CSY/reowolf Changeset - 61bb6b04e20b · Centrum Wiskunde & Informatica (CWI)

Changeset - 61bb6b04e20b

Parent rev.

Child rev.

[Not reviewed]

0 4 1

MH - 4 years ago 2021-12-10 00:04:00
contact@maxhenger.nl

Refactoring ParserType parser in anticipation of tuples

5 files changed with 141 insertions and 4 deletions:

src/protocol/ast.rs

src/protocol/parser/mod.rs

src/protocol/parser/pass_definitions.rs

src/protocol/parser/pass_definitions_types.rs

125

src/protocol/parser/tokens.rs

0 comments (0 inline, 0 general)

src/protocol/ast.rs

➞

Show inline comments

@@ @@ -327,116 +327,120 @@ pub struct ImportSymbols { @@
     pub this: ImportId,
     // Phase 1: parser
     pub span: InputSpan,
     pub module: Identifier,
     pub module_id: RootId,
     pub symbols: Vec<AliasedSymbol>,
+}
 #[derive(Debug, Clone)]
 pub struct Identifier {
     pub span: InputSpan,
     pub value: StringRef<'static>,
+}
 impl PartialEq for Identifier {
     fn eq(&self, other: &Self) -> bool {
         return self.value == other.value
+    }
+}
 impl Display for Identifier {
     fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
         write!(f, "{}", self.value.as_str())
+    }
+}
 #[derive(Debug, Clone, PartialEq, Eq)]
 pub enum ParserTypeVariant {
     // Special builtin, only usable by the compiler and not constructable by the
     // programmer
     Void,
     InputOrOutput,
     ArrayLike,
     IntegerLike,
     // Basic builtin
     Message,
     Bool,
     UInt8, UInt16, UInt32, UInt64,
     SInt8, SInt16, SInt32, SInt64,
     Character, String,
     // Literals (need to get concrete builtin type during typechecking)
     IntegerLiteral,
     // Marker for inference
     Inferred,
     // Builtins expecting one subsequent type
     Array,
     Input,
     Output,
     // Tuple: expecting any number of elements. Note that the parser type can
     // have one-valued tuples, these will be filtered out later during type
     // checking.
     Tuple(u32), // u32 = number of subsequent types
     // User-defined types
     PolymorphicArgument(DefinitionId, u32), // u32 = index into polymorphic variables
     Definition(DefinitionId, u32), // u32 = number of subsequent types in the type tree.
+}
 impl ParserTypeVariant {
     pub(crate) fn num_embedded(&self) -> usize {
         use ParserTypeVariant::*;
         match self {
             Void | IntegerLike |
             Message | Bool |
             UInt8 | UInt16 | UInt32 | UInt64 |
             SInt8 | SInt16 | SInt32 | SInt64 |
             Character | String | IntegerLiteral |
             Inferred | PolymorphicArgument(_, _) =>
 ,
             ArrayLike | InputOrOutput | Array | Input | Output =>
 ,
             Definition(_, num) => *num as usize,
+            Definition(_, num) | Tuple(num) => *num as usize,
+        }
+    }
+}
 /// ParserTypeElement is an element of the type tree. An element may be
 /// implicit, meaning that the user didn't specify the type, but it was set by
 /// the compiler.
 #[derive(Debug, Clone)]
 pub struct ParserTypeElement {
     pub element_span: InputSpan, // span of this element, not including the child types
     pub variant: ParserTypeVariant,
+}
 /// ParserType is a specification of a type during the parsing phase and initial
 /// linker/validator phase of the compilation process. These types may be
 /// (partially) inferred or represent literals (e.g. a integer whose bytesize is
 /// not yet determined).
 ///
 /// Its contents are the depth-first serialization of the type tree. Each node
 /// is a type that may accept polymorphic arguments. The polymorphic arguments
 /// are then the children of the node.
 #[derive(Debug, Clone)]
 pub struct ParserType {
     pub elements: Vec<ParserTypeElement>,
     pub full_span: InputSpan,
+}
 impl ParserType {
     pub(crate) fn iter_embedded(&self, parent_idx: usize) -> ParserTypeIter {
         ParserTypeIter::new(&self.elements, parent_idx)
+    }
+}
 /// Iterator over the embedded elements of a specific element.
 pub struct ParserTypeIter<'a> {
     pub elements: &'a [ParserTypeElement],
     pub cur_embedded_idx: usize,
+}
 impl<'a> ParserTypeIter<'a> {
     fn new(elements: &'a [ParserTypeElement], parent_idx: usize) -> Self {
         debug_assert!(parent_idx < elements.len(), "parent index exceeds number of elements in ParserType");
         if elements[0].variant.num_embedded() == 0 {
             // Parent element does not have any embedded types, place
             // `cur_embedded_idx` at end so we will always return `None`
             Self{ elements, cur_embedded_idx: elements.len() }
         } else {
             // Parent element has an embedded type

src/protocol/parser/mod.rs

➞

Show inline comments

 pub(crate) mod symbol_table;
 pub(crate) mod type_table;
 pub(crate) mod tokens;
 pub(crate) mod token_parsing;
 pub(crate) mod pass_tokenizer;
 pub(crate) mod pass_symbols;
 pub(crate) mod pass_imports;
 pub(crate) mod pass_definitions;
 pub(crate) mod pass_definitions_types;
 pub(crate) mod pass_validation_linking;
 pub(crate) mod pass_typing;
 mod visitor;
 use tokens::*;
 use crate::collections::*;
 use visitor::Visitor;
 use pass_tokenizer::PassTokenizer;
 use pass_symbols::PassSymbols;
 use pass_imports::PassImport;
 use pass_definitions::PassDefinitions;
 use pass_validation_linking::PassValidationLinking;
 use pass_typing::{PassTyping, ResolveQueue};
 use symbol_table::*;
 use type_table::TypeTable;
 use crate::protocol::ast::*;
 use crate::protocol::input_source::*;
 use crate::protocol::ast_printer::ASTWriter;
 #[derive(Debug, PartialEq, Eq, PartialOrd, Ord)]
 pub enum ModuleCompilationPhase {
     Tokenized,              // source is tokenized
     SymbolsScanned,         // all definitions are linked to their type class
     ImportsResolved,        // all imports are added to the symbol table
     DefinitionsParsed,      // produced the AST for the entire module
     TypesAddedToTable,      // added all definitions to the type table
     ValidatedAndLinked,     // AST is traversed and has linked the required AST nodes
     // When we continue with the compiler:
     // Typed,                  // Type inference and checking has been performed
+}
 pub struct Module {
     // Buffers
     pub source: InputSource,
     pub tokens: TokenBuffer,
     // Identifiers
     pub root_id: RootId,
     pub name: Option<(PragmaId, StringRef<'static>)>,
     pub version: Option<(PragmaId, i64)>,
     pub phase: ModuleCompilationPhase,
+}
 // TODO: This is kind of wrong. Because when we're producing bytecode we would
 //       like the bytecode itself to not have the notion of the size of a pointer
 //       type. But until I figure out what we do want I'll just set everything
 //       to a 64-bit architecture.

src/protocol/parser/pass_definitions.rs

➞

Show inline comments

@@ @@ -415,98 +415,98 @@ impl PassDefinitions { @@
             } else if ident == KW_STMT_BREAK {
                 let id = self.consume_break_statement(module, iter, ctx)?;
                 section.push(id.upcast());
             } else if ident == KW_STMT_CONTINUE {
                 let id = self.consume_continue_statement(module, iter, ctx)?;
                 section.push(id.upcast());
             } else if ident == KW_STMT_SYNC {
                 let id = self.consume_synchronous_statement(module, iter, ctx)?;
                 section.push(id.upcast());
                 let end_sync = ctx.heap.alloc_end_synchronous_statement(|this| EndSynchronousStatement {
                     this,
                     start_sync: id,
                     next: StatementId::new_invalid()
                 });
                 section.push(end_sync.upcast());
                 let sync_stmt = &mut ctx.heap[id];
                 sync_stmt.end_sync = end_sync;
             } else if ident == KW_STMT_FORK {
                 let id = self.consume_fork_statement(module, iter, ctx)?;
                 section.push(id.upcast());
                 let end_fork = ctx.heap.alloc_end_fork_statement(|this| EndForkStatement{
                     this,
                     start_fork: id,
                     next: StatementId::new_invalid(),
                 });
                 section.push(end_fork.upcast());
                 let fork_stmt = &mut ctx.heap[id];
                 fork_stmt.end_fork = end_fork;
             } else if ident == KW_STMT_RETURN {
                 let id = self.consume_return_statement(module, iter, ctx)?;
                 section.push(id.upcast());
             } else if ident == KW_STMT_GOTO {
                 let id = self.consume_goto_statement(module, iter, ctx)?;
                 section.push(id.upcast());
             } else if ident == KW_STMT_NEW {
                 let id = self.consume_new_statement(module, iter, ctx)?;
                 section.push(id.upcast());
             } else if ident == KW_STMT_CHANNEL {
                 let id = self.consume_channel_statement(module, iter, ctx)?;
                 section.push(id.upcast().upcast());
             } else if iter.peek() == Some(TokenKind::Colon) {
                 self.consume_labeled_statement(module, iter, ctx, section)?;
             } else {
                 // Two fallback possibilities: the first one is a memory
                 // declaration, the other one is to parse it as a regular
                 // expression. This is a bit ugly
                 // declaration, the other one is to parse it as a normal
                 // expression. This is a bit ugly.
                 if let Some((memory_stmt_id, assignment_stmt_id)) = self.maybe_consume_memory_statement(module, iter, ctx)? {
                     section.push(memory_stmt_id.upcast().upcast());
                     section.push(assignment_stmt_id.upcast());
                 } else {
                     let id = self.consume_expression_statement(module, iter, ctx)?;
                     section.push(id.upcast());
+                }
+            }
         } else {
             let id = self.consume_expression_statement(module, iter, ctx)?;
             section.push(id.upcast());
+        }
         return Ok(());
+    }
     fn consume_block_statement(
         &mut self, module: &Module, iter: &mut TokenIter, ctx: &mut PassCtx
     ) -> Result<BlockStatementId, ParseError> {
         let open_span = consume_token(&module.source, iter, TokenKind::OpenCurly)?;
         self.consume_block_statement_without_leading_curly(module, iter, ctx, open_span.begin)
+    }
     fn consume_block_statement_without_leading_curly(
         &mut self, module: &Module, iter: &mut TokenIter, ctx: &mut PassCtx, open_curly_pos: InputPosition
     ) -> Result<BlockStatementId, ParseError> {
         let mut stmt_section = self.statements.start_section();
         let mut next = iter.next();
         while next != Some(TokenKind::CloseCurly) {
             if next.is_none() {
                 return Err(ParseError::new_error_str_at_pos(
                     &module.source, iter.last_valid_pos(), "expected a statement or '}'"
                 ));
+            }
             self.consume_statement(module, iter, ctx, &mut stmt_section)?;
             next = iter.next();
+        }
         let statements = stmt_section.into_vec();
         let mut block_span = consume_token(&module.source, iter, TokenKind::CloseCurly)?;
         block_span.begin = open_curly_pos;
         let id = ctx.heap.alloc_block_statement(|this| BlockStatement{
             this,
             is_implicit: false,
             span: block_span,
             statements,
             end_block: EndBlockStatementId::new_invalid(),
@@ @@ -1664,97 +1664,97 @@ impl PassDefinitions { @@
             let right = higher_precedence_fn(self, module, iter, ctx)?;
             let full_span = InputSpan::from_positions(
                 ctx.heap[left].full_span().begin,
                 ctx.heap[right].full_span().end,
             );
             result = ctx.heap.alloc_binary_expression(|this| BinaryExpression{
                 this, operator_span, full_span, left, operation, right,
                 parent: ExpressionParent::None,
                 unique_id_in_definition: -1,
             }).upcast();
+        }
         Ok(result)
+    }
     #[inline]
     fn consume_expression_list(
         &mut self, module: &Module, iter: &mut TokenIter, ctx: &mut PassCtx, end_pos: Option<&mut InputPosition>
     ) -> Result<Vec<ExpressionId>, ParseError> {
         let mut section = self.expressions.start_section();
         consume_comma_separated(
             TokenKind::OpenParen, TokenKind::CloseParen, &module.source, iter, ctx,
             |_source, iter, ctx| self.consume_expression(module, iter, ctx),
             &mut section, "an expression", "a list of expressions", end_pos
         )?;
         Ok(section.into_vec())
+    }
+}
 /// Consumes a type. A type always starts with an identifier which may indicate
 /// a builtin type or a user-defined type. The fact that it may contain
 /// polymorphic arguments makes it a tree-like structure. Because we cannot rely
 /// on knowing the exact number of polymorphic arguments we do not check for
 /// these.
 ///
 /// Note that the first depth index is used as a hack.
 // TODO: @Optimize, @Cleanup
 fn consume_parser_type(
     source: &InputSource, iter: &mut TokenIter, symbols: &SymbolTable, heap: &Heap, poly_vars: &[Identifier],
     cur_scope: SymbolScope, wrapping_definition: DefinitionId, allow_inference: bool, first_angle_depth: i32,
 ) -> Result<ParserType, ParseError> {
     struct Entry{
         element: ParserTypeElement,
         depth: i32,
+    }
-    // After parsing the array modified "[]", we need to insert an array type
+    // After parsing the array modifier "[]", we need to insert an array type
     // before the most recently parsed type.
     fn insert_array_before(elements: &mut Vec<Entry>, depth: i32, span: InputSpan) {
         let index = elements.iter().rposition(|e| e.depth == depth).unwrap();
         let num_embedded = elements[index].element.variant.num_embedded();
         elements.insert(index, Entry{
             element: ParserTypeElement{ element_span: span, variant: ParserTypeVariant::Array },
             depth,
         });
         // Now the original element, and all of its children, should have their
         // depth incremented by 1
         elements[index + 1].depth += 1;
         if num_embedded != 0 {
             for idx in index + 2..elements.len() {
                 let element = &mut elements[idx];
                 if element.depth >= depth + 1 {
                     element.depth += 1;
                 } else {
                     break;
+                }
+            }
+        }
+    }
     // Most common case we just have one type, perhaps with some array
     // annotations. This is both the hot-path, and simplifies the state machine
     // that follows and is responsible for parsing more complicated types.
     let element = consume_parser_type_ident(
         source, iter, symbols, heap, poly_vars, cur_scope,
         wrapping_definition, allow_inference
     )?;
     if iter.next() != Some(TokenKind::OpenAngle) {
         let num_embedded = element.variant.num_embedded();
         let first_pos = element.element_span.begin;
         let mut last_pos = element.element_span.end;
         let mut elements = Vec::with_capacity(num_embedded + 2); // type itself + embedded + 1 (maybe) array type
         // Consume any potential array elements
         while iter.next() == Some(TokenKind::OpenSquare) {
             let mut array_span = iter.next_span();
             iter.consume();
             let end_span = iter.next_span();
             array_span.end = end_span.end;
             consume_token(source, iter, TokenKind::CloseSquare)?;
             last_pos = end_span.end;

src/protocol/parser/pass_definitions_types.rs

➞

Show inline comments

@@ new file 100644 @@
 use crate::protocol::*;
 use crate::protocol::parser::*;
 struct Entry {
     element: ParserTypeElement,
     depth: i32,
+}
 #[derive(Copy, Clone)]
 enum DepthKind {
     Tuple, // because we had a `(` token
     PolyArgs, // because we had a `<` token
+}
 struct DepthElement {
     kind: DepthKind,
     pos: InputPosition,
+}
 /// Temporarily keep the error around in unevaluated format because sometimes
 /// when we evaluate a `ParserType`, we perform some backtracking if it fails.
 enum ParserTypeError {
     ExpectedAType(InputPosition),
     MessageAt(InputPosition, &'static str),
     TwoMessageAt(InputPosition, &'static str, InputPosition, &'static str),
+}
 /// Parsers tokens into `ParserType` instances (yes, the name of the struct is
 /// silly). Essentially a little state machine with its own temporary storage.
 pub(crate) struct ParserTypeParser {
     entries: Vec<Entry>,
     depths: Vec<DepthElement>,
     first_pos: InputPosition,
     last_pos: InputPosition,
+}
 impl ParserTypeParser {
     pub(crate) fn consume_parser_type(
         &mut self,
         source: &InputSource, iter: &mut TokenIter,
         symbols: &SymbolTable, heap: &Heap, poly_vars: &[Identifier],
         cur_scope: SymbolScope, wrapping_definition: DefinitionId,
         allow_inference: bool, inside_angular_bracket: Option<InputPosition>,
     ) -> Result<ParserType, ParserTypeError> {
         // Make sure we start in an empty state
         debug_assert!(self.entries.is_empty());
         debug_assert!(self.depths.is_empty());
         // Setup processing
         if let Some(bracket_pos) = inside_angular_bracket {
             self.push_depth(DepthKind::PolyArgs, bracket_pos);
+        }
         let first_element = match iter.next() {
             Some(TokenKind::Ident) => {
             },
             Some(TokenKind::OpenParen) => {
                 let tuple_start_pos = iter.next_start_position();
                 self.push_depth(DepthKind::Tuple, tuple_start_pos);
                 self.entries.push(Entry{
                     element: ParserTypeElement{
                         element_span: InputSpan::from_positions(tuple_start_pos, tuple_start_pos),
                         variant: ParserTypeVariant::Tuple(0),
                     },
                     depth: self.cur_depth(),
                 });
                 iter.consume();
             },
             _ => return Err(ParserTypeError::MessageAt(iter.last_valid_pos(), "expected a type")),
         };
         // Convert the results from parsing into the `ParserType`
         let mut elements = Vec::with_capacity(self.entries.len());
         debug_assert!(!self.entries.is_empty());
         for entry in self.entries.drain(..) {
             elements.push(entry.element)
+        }
         return Ok(ParserType{
             elements,
             full_span: InputSpan::from_positions(self.first_pos, self.last_pos),
         });
+    }
     /// Consumes an identifier that should resolve to some kind of type. There
     /// may be trailing '::' tokens, commas, or polymorphic arguments.
     fn consume_parser_type_ident(
         &self, iter: &mut TokenIter, symbols: &SymbolTable, poly_vars: &[Identifier],
         allow_inference: bool,
     ) -> Result<ParserTypeElement, ParserTypeError> {
+    }
     #[inline]
     fn push_depth(&mut self, kind: DepthKind, pos: InputPosition) {
         self.depths.push(DepthElement{ kind, pos });
+    }
     #[inline]
     fn pop_depth(&mut self, kind: DepthKind, pos: InputPosition) -> Result<(), ParserTypeError> {
         if self.depths.is_empty() {
             // More closing parens than opening ones
             let message = match kind {
                 DepthKind::Tuple => "unmatched ')'",
                 DepthKind::PolyArgs => "unmatched '>'",
             };
             return Err(ParserTypeError::MessageAt(pos, message))
+        }
         let last = *self.depths.last().unwrap();
         if last != kind {
             // Wrong kind of paren
             let (message_second) = match kind {
                 DepthKind::Tuple => "unexpected closing"
+            }
+        }
+    }
     #[inline]
     fn cur_depth(&self) -> i32 {
         return self.depths.len() as i32;
+    }
+}
@@ \ No newline at end of file @@

src/protocol/parser/tokens.rs

➞

Show inline comments

@@ @@ -253,91 +253,98 @@ impl<'a> TokenIter<'a> { @@
         let token = &self.tokens[self.cur];
         Some(token.kind)
+    }
     /// Returns the next token (but skips over comments), or `None` if at the
     /// end of the range
     pub(crate) fn next(&mut self) -> Option<TokenKind> {
         while let Some(token_kind) = self.next_including_comments() {
             if token_kind != TokenKind::LineComment && token_kind != TokenKind::BlockComment {
                 return Some(token_kind);
+            }
             self.consume();
+        }
         return None
+    }
     /// Peeks ahead by one token (i.e. the one that comes after `next()`), and
     /// skips over comments
     pub(crate) fn peek(&self) -> Option<TokenKind> {
         for next_idx in self.cur + 1..self.end {
             let next_kind = self.tokens[next_idx].kind;
             if next_kind != TokenKind::LineComment && next_kind != TokenKind::BlockComment && next_kind != TokenKind::SpanEnd {
                 return Some(next_kind);
+            }
+        }
         return None;
+    }
     /// Returns the start position belonging to the token returned by `next`. If
     /// there is not a next token, then we return the end position of the
     /// previous token.
     pub(crate) fn last_valid_pos(&self) -> InputPosition {
         if self.cur < self.end {
             // Return token position
             return self.tokens[self.cur].pos
+        }
         // Return previous token end
         let token = &self.tokens[self.cur - 1];
         return if token.kind == TokenKind::SpanEnd {
             token.pos
         } else {
             token.pos.with_offset(token.kind.num_characters())
         };
+    }
     /// Assumes the token is not at the end and returns the starting position
     /// belonging to the token returned by `next`.
     pub(crate) fn next_start_position(&self) -> InputPosition {
         debug_assert!(self.cur < self.end);
         return self.tokens[self.cur].pos;
+    }
     /// Returns the token range belonging to the token returned by `next`. This
     /// assumes that we're not at the end of the range we're iterating over.
     pub(crate) fn next_positions(&self) -> (InputPosition, InputPosition) {
         debug_assert!(self.cur < self.end);
         let token = &self.tokens[self.cur];
         if token.kind.has_span_end() {
             let span_end = &self.tokens[self.cur + 1];
             debug_assert_eq!(span_end.kind, TokenKind::SpanEnd);
             (token.pos, span_end.pos)
         } else {
             let offset = token.kind.num_characters();
             (token.pos, token.pos.with_offset(offset))
+        }
+    }
     /// See `next_positions`
     pub(crate) fn next_span(&self) -> InputSpan {
         let (begin, end) = self.next_positions();
         return InputSpan::from_positions(begin, end)
+    }
     /// Advances the iterator to the next (meaningful) token.
     pub(crate) fn consume(&mut self) {
         if let Some(kind) = self.next_including_comments() {
             if kind.has_span_end() {
                 self.cur += 2;
             } else {
                 self.cur += 1;
+            }
+        }
+    }
     /// Saves the current iteration position, may be passed to `load` to return
     /// the iterator to a previous position.
     pub(crate) fn save(&self) -> (usize, usize) {
         (self.cur, self.end)
+    }
     pub(crate) fn load(&mut self, saved: (usize, usize)) {
         self.cur = saved.0;
         self.end = saved.1;
+    }
+}
@@ \ No newline at end of file @@

0 comments (0 inline, 0 general)