CSY/reowolf Changeset - 3ebc282f2d0e · Centrum Wiskunde & Informatica (CWI)

Changeset - 3ebc282f2d0e

Parent rev.

Child rev.

[Not reviewed]

0 3 1

mh - 4 years ago 2021-05-31 11:13:54
contact@maxhenger.nl

String literal testing, fix string escaping bug

4 files changed with 84 insertions and 6 deletions:

src/protocol/parser/pass_tokenizer.rs

src/protocol/parser/token_parsing.rs

src/protocol/tests/mod.rs

src/protocol/tests/parser_literals.rs

0 comments (0 inline, 0 general)

src/protocol/parser/pass_tokenizer.rs

➞

Show inline comments

 use crate::protocol::input_source::{
     InputSource as InputSource,
     ParseError,
     InputPosition as InputPosition,
 };
 use super::tokens::*;
 use super::token_parsing::*;
 /// Tokenizer is a reusable parser to tokenize multiple source files using the
 /// same allocated buffers. In a well-formed program, we produce a consistent
 /// tree of token ranges such that we may identify tokens that represent a
 /// defintion or an import before producing the entire AST.
 ///
 /// If the program is not well-formed then the tree may be inconsistent, but we
 /// will detect this once we transform the tokens into the AST. To ensure a
 /// consistent AST-producing phase we will require the import to have balanced
 /// curly braces
 pub(crate) struct PassTokenizer {
     // Stack of input positions of opening curly braces, used to detect
     // unmatched opening braces, unmatched closing braces are detected
     // immediately.
     curly_stack: Vec<InputPosition>,
     // Points to an element in the `TokenBuffer.ranges` variable.
     stack_idx: usize,
+}
 impl PassTokenizer {
     pub(crate) fn new() -> Self {
         Self{
             curly_stack: Vec::with_capacity(32),
             stack_idx: 0
+        }
+    }
     pub(crate) fn tokenize(&mut self, source: &mut InputSource, target: &mut TokenBuffer) -> Result<(), ParseError> {
         // Assert source and buffer are at start
         debug_assert_eq!(source.pos().offset, 0);
         debug_assert!(target.tokens.is_empty());
         debug_assert!(target.ranges.is_empty());
         // Set up for tokenization by pushing the first range onto the stack.
         // This range may get transformed into the appropriate range kind later,
         // see `push_range` and `pop_range`.
         self.stack_idx = 0;
         target.ranges.push(TokenRange{
             parent_idx: NO_RELATION,
             range_kind: TokenRangeKind::Module,
             curly_depth: 0,
             start: 0,
             end: 0,
             num_child_ranges: 0,
             first_child_idx: NO_RELATION,
             last_child_idx: NO_RELATION,
             next_sibling_idx: NO_RELATION,
         });
         // Main tokenization loop
         while let Some(c) = source.next() {
             let token_index = target.tokens.len() as u32;
             if is_char_literal_start(c) {
                 self.consume_char_literal(source, target)?;
             } else if is_string_literal_start(c) {
                 self.consume_string_literal(source, target)?;
             } else if is_identifier_start(c) {
                 let ident = self.consume_identifier(source, target)?;
                 if demarks_definition(ident) {
                     self.push_range(target, TokenRangeKind::Definition, token_index);
                 } else if demarks_import(ident) {
                     self.push_range(target, TokenRangeKind::Import, token_index);
+                }
             } else if is_integer_literal_start(c) {
                 self.consume_number(source, target)?;
             } else if is_pragma_start_or_pound(c) {
                 let was_pragma = self.consume_pragma_or_pound(c, source, target)?;
                 if was_pragma {
                     self.push_range(target, TokenRangeKind::Pragma, token_index);
+                }
             } else if self.is_line_comment_start(c, source) {
                 self.consume_line_comment(source, target)?;
             } else if self.is_block_comment_start(c, source) {
                 self.consume_block_comment(source, target)?;
             } else if is_whitespace(c) {
                 let contained_newline = self.consume_whitespace(source);
                 if contained_newline {
                     let range = &target.ranges[self.stack_idx];
                     if range.range_kind == TokenRangeKind::Pragma {
                         self.pop_range(target, target.tokens.len() as u32);
+                    }
+                }
             } else {
                 let was_punctuation = self.maybe_parse_punctuation(c, source, target)?;
                 if let Some((token, token_pos)) = was_punctuation {
                     if token == TokenKind::OpenCurly {
                         self.curly_stack.push(token_pos);
                     } else if token == TokenKind::CloseCurly {
                         // Check if this marks the end of a range we're
                         // currently processing
                         if self.curly_stack.is_empty() {
                             return Err(ParseError::new_error_str_at_pos(
                                 source, token_pos, "unmatched closing curly brace '}'"
                             ));
+                        }
                         self.curly_stack.pop();
                         let range = &target.ranges[self.stack_idx];
                         if range.range_kind == TokenRangeKind::Definition && range.curly_depth == self.curly_stack.len() as u32 {
                             self.pop_range(target, target.tokens.len() as u32);
+                        }
                         // Exit early if we have more closing curly braces than
                         // opening curly braces
                     } else if token == TokenKind::SemiColon {
                         // Check if this marks the end of an import
                         let range = &target.ranges[self.stack_idx];
                         if range.range_kind == TokenRangeKind::Import {
                             self.pop_range(target, target.tokens.len() as u32);
+                        }
+                    }
                 } else {
                     return Err(ParseError::new_error_str_at_pos(
                         source, source.pos(), "unexpected character"
                     ));
+                }
+            }
+        }
         // End of file, check if our state is correct
         if let Some(error) = source.had_error.take() {
             return Err(error);
+        }
         if !self.curly_stack.is_empty() {
             // Let's not add a lot of heuristics and just tell the programmer
             // that something is wrong
             let last_unmatched_open = self.curly_stack.pop().unwrap();
             return Err(ParseError::new_error_str_at_pos(
                 source, last_unmatched_open, "unmatched opening curly brace '{'"
             ));
+        }
         // Ranges that did not depend on curly braces may have missing tokens.
         // So close all of the active tokens
         while self.stack_idx != 0 {
             self.pop_range(target, target.tokens.len() as u32);
+        }
         // And finally, we may have a token range at the end that doesn't belong
         // to a range yet, so insert a "code" range if this is the case.
         debug_assert_eq!(self.stack_idx, 0);
         let last_registered_idx = target.ranges[0].end;
         let last_token_idx = target.tokens.len() as u32;
         if last_registered_idx != last_token_idx {
             self.add_code_range(target, 0, last_registered_idx, last_token_idx, NO_RELATION);
+        }
         // TODO: @remove once I'm sure the algorithm works. For now it is better
         //  if the debugging is a little more expedient
         if cfg!(debug_assertions) {
             // For each range make sure its children make sense
             for parent_idx in 0..target.ranges.len() {
                 let cur_range = &target.ranges[parent_idx];
                 if cur_range.num_child_ranges == 0 {
                     assert_eq!(cur_range.first_child_idx, NO_RELATION);
                     assert_eq!(cur_range.last_child_idx, NO_RELATION);
                 } else {
                     assert_ne!(cur_range.first_child_idx, NO_RELATION);
                     assert_ne!(cur_range.last_child_idx, NO_RELATION);
                     let mut child_counter = 0u32;
                     let mut last_valid_child_idx = cur_range.first_child_idx;
                     let mut child_idx = cur_range.first_child_idx;
                     while child_idx != NO_RELATION {
                         let child_range = &target.ranges[child_idx as usize];
                         assert_eq!(child_range.parent_idx, parent_idx as i32);
                         last_valid_child_idx = child_idx;
                         child_idx = child_range.next_sibling_idx;
                         child_counter += 1;
+                    }
                     assert_eq!(cur_range.last_child_idx, last_valid_child_idx);
                     assert_eq!(cur_range.num_child_ranges, child_counter);
+                }
+            }
+        }
         Ok(())
+    }
     fn is_line_comment_start(&self, first_char: u8, source: &InputSource) -> bool {
         return first_char == b'/' && Some(b'/') == source.lookahead(1);
+    }
     fn is_block_comment_start(&self, first_char: u8, source: &InputSource) -> bool {
         return first_char == b'/' && Some(b'*') == source.lookahead(1);
+    }
     fn maybe_parse_punctuation(
         &mut self, first_char: u8, source: &mut InputSource, target: &mut TokenBuffer
     ) -> Result<Option<(TokenKind, InputPosition)>, ParseError> {
         debug_assert!(first_char != b'#', "'#' needs special handling");
         debug_assert!(first_char != b'\'', "'\'' needs special handling");
         debug_assert!(first_char != b'"', "'\"' needs special handling");
         let pos = source.pos();
         let token_kind;
         if first_char == b'!' {
             source.consume();
             if Some(b'=') == source.next() {
                 source.consume();
                 token_kind = TokenKind::NotEqual;
             } else {
                 token_kind = TokenKind::Exclamation;
+            }
         } else if first_char == b'%' {
             source.consume();
             if Some(b'=') == source.next() {
                 source.consume();
                 token_kind = TokenKind::PercentEquals;
             } else {
                 token_kind = TokenKind::Percent;
+            }
         } else if first_char == b'&' {
             source.consume();
             let next = source.next();
             if Some(b'&') == next {
                 source.consume();
                 token_kind = TokenKind::AndAnd;
             } else if Some(b'=') == next {
                 source.consume();
                 token_kind = TokenKind::AndEquals;
             } else {
                 token_kind = TokenKind::And;
+            }
         } else if first_char == b'(' {
             source.consume();
             token_kind = TokenKind::OpenParen;
         } else if first_char == b')' {
             source.consume();
             token_kind = TokenKind::CloseParen;
         } else if first_char == b'*' {
             source.consume();
             if let Some(b'=') = source.next() {
                 source.consume();
                 token_kind = TokenKind::StarEquals;
             } else {
                 token_kind = TokenKind::Star;
+            }
         } else if first_char == b'+' {
             source.consume();
             let next = source.next();
             if Some(b'+') == next {
                 source.consume();
                 token_kind = TokenKind::PlusPlus;
             } else if Some(b'=') == next {
                 source.consume();
                 token_kind = TokenKind::PlusEquals;
             } else {
                 token_kind = TokenKind::Plus;
+            }
         } else if first_char == b',' {
             source.consume();
             token_kind = TokenKind::Comma;
         } else if first_char == b'-' {
             source.consume();
             let next = source.next();
             if Some(b'-') == next {
                 source.consume();
                 token_kind = TokenKind::MinusMinus;
             } else if Some(b'>') == next {
                 source.consume();
                 token_kind = TokenKind::ArrowRight;
             } else if Some(b'=') == next {
                 source.consume();
                 token_kind = TokenKind::MinusEquals;
             } else {
                 token_kind = TokenKind::Minus;
+            }
         } else if first_char == b'.' {
             source.consume();
             if let Some(b'.') = source.next() {
                 source.consume();
                 token_kind = TokenKind::DotDot;
             } else {
                 token_kind = TokenKind::Dot
+            }
         } else if first_char == b'/' {
             source.consume();
             debug_assert_ne!(Some(b'/'), source.next());
             debug_assert_ne!(Some(b'*'), source.next());
             if let Some(b'=') = source.next() {
                 source.consume();
                 token_kind = TokenKind::SlashEquals;
             } else {
                 token_kind = TokenKind::Slash;
+            }
         } else if first_char == b':' {
             source.consume();
             if let Some(b':') = source.next() {
                 source.consume();
                 token_kind = TokenKind::ColonColon;
             } else {
                 token_kind = TokenKind::Colon;
+            }
         } else if first_char == b';' {
             source.consume();
             token_kind = TokenKind::SemiColon;
         } else if first_char == b'<' {
             source.consume();
             let next = source.next();
             if let Some(b'<') = next {
                 source.consume();
                 if let Some(b'=') = source.next() {
                     source.consume();
                     token_kind = TokenKind::ShiftLeftEquals;
                 } else {
                     token_kind = TokenKind::ShiftLeft;
+                }
             } else if let Some(b'=') = next {
                 source.consume();
                 token_kind = TokenKind::LessEquals;
             } else {
                 token_kind = TokenKind::OpenAngle;
+            }
         } else if first_char == b'=' {
             source.consume();
             if let Some(b'=') = source.next() {
                 source.consume();
                 token_kind = TokenKind::EqualEqual;
             } else {
                 token_kind = TokenKind::Equal;
+            }
         } else if first_char == b'>' {
             source.consume();
             let next = source.next();
             if Some(b'>') == next {
                 source.consume();
                 if Some(b'=') == source.next() {
                     source.consume();
                     token_kind = TokenKind::ShiftRightEquals;
                 } else {
                     token_kind = TokenKind::ShiftRight;
+                }
             } else if Some(b'=') == next {
                 source.consume();
                 token_kind = TokenKind::GreaterEquals;
             } else {
                 token_kind = TokenKind::CloseAngle;
+            }
         } else if first_char == b'?' {
             source.consume();
             token_kind = TokenKind::Question;
         } else if first_char == b'@' {
             source.consume();
             if let Some(b'=') = source.next() {
                 source.consume();
                 token_kind = TokenKind::AtEquals;
             } else {
                 token_kind = TokenKind::At;
+            }
         } else if first_char == b'[' {
             source.consume();
             token_kind = TokenKind::OpenSquare;
         } else if first_char == b']' {
             source.consume();
             token_kind = TokenKind::CloseSquare;
         } else if first_char == b'^' {
             source.consume();
             if let Some(b'=') = source.next() {
                 source.consume();
                 token_kind = TokenKind::CaretEquals;
             } else {
                 token_kind = TokenKind::Caret;
+            }
         } else if first_char == b'{' {
             source.consume();
             token_kind = TokenKind::OpenCurly;
         } else if first_char == b'|' {
             source.consume();
             let next = source.next();
             if Some(b'|') == next {
                 source.consume();
                 token_kind = TokenKind::OrOr;
             } else if Some(b'=') == next {
                 source.consume();
                 token_kind = TokenKind::OrEquals;
             } else {
                 token_kind = TokenKind::Or;
+            }
         } else if first_char == b'}' {
             source.consume();
             token_kind = TokenKind::CloseCurly;
         } else if first_char == b'~' {
             source.consume();
             token_kind = TokenKind::Tilde;
         } else {
             self.check_ascii(source)?;
             return Ok(None);
+        }
         target.tokens.push(Token::new(token_kind, pos));
         Ok(Some((token_kind, pos)))
+    }
     fn consume_char_literal(&mut self, source: &mut InputSource, target: &mut TokenBuffer) -> Result<(), ParseError> {
         let begin_pos = source.pos();
         // Consume the leading quote
         debug_assert!(source.next().unwrap() == b'\'');
         source.consume();
         let mut prev_char = b'\'';
         while let Some(c) = source.next() {
             if !c.is_ascii() {
                 return Err(ParseError::new_error_str_at_pos(source, source.pos(), "non-ASCII character in char literal"));
+            }
             source.consume();
             // Make sure ending quote was not escaped
             if c == b'\'' && prev_char != b'\\' {
                 prev_char = c;
                 break;
+            }
             prev_char = c;
+        }
         if prev_char != b'\'' {
             // Unterminated character literal, reached end of file.
             return Err(ParseError::new_error_str_at_pos(source, begin_pos, "encountered unterminated character literal"));
+        }
         let end_pos = source.pos();
         target.tokens.push(Token::new(TokenKind::Character, begin_pos));
         target.tokens.push(Token::new(TokenKind::SpanEnd, end_pos));
         Ok(())
+    }
     fn consume_string_literal(&mut self, source: &mut InputSource, target: &mut TokenBuffer) -> Result<(), ParseError> {
         let begin_pos = source.pos();
         // Consume the leading double quotes
         debug_assert!(source.next().unwrap() == b'"');
         source.consume();
         let mut prev_char = b'"';
         while let Some(c) = source.next() {
             if !c.is_ascii() {
                 return Err(ParseError::new_error_str_at_pos(source, source.pos(), "non-ASCII character in string literal"));
+            }
             source.consume();
             if c == b'"' && prev_char != b'\\' {
                 // Unescaped string terminator
                 prev_char = c;
                 break;
+            }
             if prev_char == b'\\' && c == b'\\' {
                 // Escaped backslash, set prev_char to bogus to not conflict
                 // with escaped-" and unterminated string literal detection.
                 prev_char = b'\0';
             } else {
                 prev_char = c;
+            }
+        }
         if prev_char != b'"' {
             // Unterminated string literal
             return Err(ParseError::new_error_str_at_pos(source, begin_pos, "encountered unterminated string literal"));
+        }
         let end_pos = source.pos();
         target.tokens.push(Token::new(TokenKind::String, begin_pos));
         target.tokens.push(Token::new(TokenKind::SpanEnd, end_pos));
         Ok(())
+    }
     fn consume_pragma_or_pound(&mut self, first_char: u8, source: &mut InputSource, target: &mut TokenBuffer) -> Result<bool, ParseError> {
         let start_pos = source.pos();
         debug_assert_eq!(first_char, b'#');
         source.consume();
         let next = source.next();
         if next.is_none() || !is_identifier_start(next.unwrap()) {
             // Just a pound sign
             target.tokens.push(Token::new(TokenKind::Pound, start_pos));
             Ok(false)
         } else {
             // Pound sign followed by identifier
             source.consume();
             while let Some(c) = source.next() {
                 if !is_identifier_remaining(c) {
                     break;
+                }
                 source.consume();
+            }
             self.check_ascii(source)?;
             let end_pos = source.pos();
             target.tokens.push(Token::new(TokenKind::Pragma, start_pos));
             target.tokens.push(Token::new(TokenKind::SpanEnd, end_pos));
             Ok(true)
+        }
+    }
     fn consume_line_comment(&mut self, source: &mut InputSource, target: &mut TokenBuffer) -> Result<(), ParseError> {
         let begin_pos = source.pos();
         // Consume the leading "//"
         debug_assert!(source.next().unwrap() == b'/' && source.lookahead(1).unwrap() == b'/');
         source.consume();
         source.consume();
         let mut prev_char = b'/';
         let mut cur_char = b'/';
         while let Some(c) = source.next() {
             prev_char = cur_char;
             cur_char = c;
             if c == b'\n' {
                 // End of line, note that the newline is not consumed
                 break;
+            }
             source.consume();
+        }
         let mut end_pos = source.pos();
         debug_assert_eq!(begin_pos.line, end_pos.line);
         // Modify offset to not include the newline characters
         if cur_char == b'\n' {
             if prev_char == b'\r' {
                 end_pos.offset -= 2;
             } else {
                 end_pos.offset -= 1;
+            }
             // Consume final newline
             source.consume();
         } else {
             // End of comment was due to EOF
             debug_assert!(source.next().is_none())
+        }
         target.tokens.push(Token::new(TokenKind::LineComment, begin_pos));
         target.tokens.push(Token::new(TokenKind::SpanEnd, end_pos));
         Ok(())
+    }
     fn consume_block_comment(&mut self, source: &mut InputSource, target: &mut TokenBuffer) -> Result<(), ParseError> {
         let begin_pos = source.pos();
         // Consume the leading "/*"
         debug_assert!(source.next().unwrap() == b'/' && source.lookahead(1).unwrap() == b'*');
         source.consume();
         source.consume();
         // Explicitly do not put prev_char at "*", because then "/*/" would
         // represent a valid and closed block comment
         let mut prev_char = b' ';
         let mut is_closed = false;
         while let Some(c) = source.next() {
             source.consume();
             if prev_char == b'*' && c == b'/' {
                 // End of block comment
                 is_closed = true;
                 break;
+            }
             prev_char = c;
+        }
         if !is_closed {
             return Err(ParseError::new_error_str_at_pos(
                 source, source.pos(), "encountered unterminated block comment")
             );
+        }
         let end_pos = source.pos();
         target.tokens.push(Token::new(TokenKind::BlockComment, begin_pos));
         target.tokens.push(Token::new(TokenKind::SpanEnd, end_pos));
         Ok(())
+    }
     fn consume_identifier<'a>(&mut self, source: &'a mut InputSource, target: &mut TokenBuffer) -> Result<&'a [u8], ParseError> {
         let begin_pos = source.pos();
         debug_assert!(is_identifier_start(source.next().unwrap()));
         source.consume();
         // Keep reading until no more identifier
         while let Some(c) = source.next() {
             if !is_identifier_remaining(c) {
                 break;
+            }
             source.consume();
+        }
         self.check_ascii(source)?;
         let end_pos = source.pos();
         target.tokens.push(Token::new(TokenKind::Ident, begin_pos));
         target.tokens.push(Token::new(TokenKind::SpanEnd, end_pos));
         Ok(source.section_at_pos(begin_pos, end_pos))
+    }
     fn consume_number(&mut self, source: &mut InputSource, target: &mut TokenBuffer) -> Result<(), ParseError> {
         let begin_pos = source.pos();
         debug_assert!(is_integer_literal_start(source.next().unwrap()));
         source.consume();
         // Keep reading until it doesn't look like a number anymore
         while let Some(c) = source.next() {
             if !maybe_number_remaining(c) {
                 break;
+            }
             source.consume();
+        }
         self.check_ascii(source)?;
         let end_pos = source.pos();
         target.tokens.push(Token::new(TokenKind::Integer, begin_pos));
         target.tokens.push(Token::new(TokenKind::SpanEnd, end_pos));
         Ok(())
+    }
     // Consumes whitespace and returns whether or not the whitespace contained
     // a newline.
     fn consume_whitespace(&self, source: &mut InputSource) -> bool {
         debug_assert!(is_whitespace(source.next().unwrap()));
         let mut has_newline = false;
         while let Some(c) = source.next() {
             if !is_whitespace(c) {
                 break;
+            }
             if c == b'\n' {
                 has_newline = true;
+            }
             source.consume();
+        }
         has_newline
+    }
     fn add_code_range(
         &mut self, target: &mut TokenBuffer, parent_idx: i32,
         code_start_idx: u32, code_end_idx: u32, next_sibling_idx: i32
     ) {
         let new_range_idx = target.ranges.len() as i32;
         let parent_range = &mut target.ranges[parent_idx as usize];
         debug_assert_ne!(parent_range.end, code_end_idx, "called push_code_range without a need to do so");
         let sibling_idx = parent_range.last_child_idx;
         parent_range.last_child_idx = new_range_idx;
         parent_range.end = code_end_idx;
         parent_range.num_child_ranges += 1;
         let curly_depth = self.curly_stack.len() as u32;
         target.ranges.push(TokenRange{
             parent_idx,
             range_kind: TokenRangeKind::Code,
             curly_depth,
             start: code_start_idx,
             end: code_end_idx,
             num_child_ranges: 0,
             first_child_idx: NO_RELATION,
             last_child_idx: NO_RELATION,
             next_sibling_idx,
         });
         // Fix up the sibling indices
         if sibling_idx != NO_RELATION {
             let sibling_range = &mut target.ranges[sibling_idx as usize];
             sibling_range.next_sibling_idx = new_range_idx;
+        }
+    }
     fn push_range(&mut self, target: &mut TokenBuffer, range_kind: TokenRangeKind, first_token_idx: u32) {
         let new_range_idx = target.ranges.len() as i32;
         let parent_idx = self.stack_idx as i32;
         let parent_range = &mut target.ranges[self.stack_idx];
         if parent_range.first_child_idx == NO_RELATION {
             parent_range.first_child_idx = new_range_idx;
+        }
         let last_registered_idx = parent_range.end;
         if last_registered_idx != first_token_idx {
             self.add_code_range(target, parent_idx, last_registered_idx, first_token_idx, new_range_idx + 1);
+        }
         // Push the new range
         self.stack_idx = target.ranges.len();
         let curly_depth = self.curly_stack.len() as u32;
         target.ranges.push(TokenRange{
             parent_idx,
             range_kind,
             curly_depth,
             start: first_token_idx,
             end: first_token_idx, // modified when popped
             num_child_ranges: 0,
             first_child_idx: NO_RELATION,
             last_child_idx: NO_RELATION,
             next_sibling_idx: NO_RELATION
         })
+    }
     fn pop_range(&mut self, target: &mut TokenBuffer, end_token_idx: u32) {
         let popped_idx = self.stack_idx as i32;
         let popped_range = &mut target.ranges[self.stack_idx];
         debug_assert!(self.stack_idx != 0, "attempting to pop top-level range");
         // Fix up the current range before going back to parent
         popped_range.end = end_token_idx;
         debug_assert_ne!(popped_range.start, end_token_idx);
         // Go back to parent and fix up its child pointers, but remember the
         // last child, so we can link it to the newly popped range.
         self.stack_idx = popped_range.parent_idx as usize;
         let parent = &mut target.ranges[self.stack_idx];
         if parent.first_child_idx == NO_RELATION {
             parent.first_child_idx = popped_idx;
+        }
         let prev_sibling_idx = parent.last_child_idx;
         parent.last_child_idx = popped_idx;
         parent.end = end_token_idx;
         parent.num_child_ranges += 1;
         // Fix up the sibling (if it exists)
         if prev_sibling_idx != NO_RELATION {
             let sibling = &mut target.ranges[prev_sibling_idx as usize];
             sibling.next_sibling_idx = popped_idx;
+        }
+    }
     fn check_ascii(&self, source: &InputSource) -> Result<(), ParseError> {
         match source.next() {
             Some(c) if !c.is_ascii() => {
                 Err(ParseError::new_error_str_at_pos(source, source.pos(), "encountered a non-ASCII character"))
             },
             _else => {
                 Ok(())
             },
+        }
+    }
+}
 // Helpers for characters
 fn demarks_definition(ident: &[u8]) -> bool {
     return
         ident == KW_STRUCT ||
             ident == KW_ENUM ||
             ident == KW_UNION ||
             ident == KW_FUNCTION ||
             ident == KW_PRIMITIVE ||
             ident == KW_COMPOSITE
+}
 fn demarks_import(ident: &[u8]) -> bool {
     return ident == KW_IMPORT;
+}
 fn is_whitespace(c: u8) -> bool {
     c.is_ascii_whitespace()
+}
 fn is_char_literal_start(c: u8) -> bool {
     return c == b'\'';
+}
 fn is_string_literal_start(c: u8) -> bool {
     return c == b'"';
+}
 fn is_pragma_start_or_pound(c: u8) -> bool {
     return c == b'#';
+}
 fn is_identifier_start(c: u8) -> bool {
     return
         (c >= b'a' && c <= b'z') ||
             (c >= b'A' && c <= b'Z') ||
             c == b'_'
+}
 fn is_identifier_remaining(c: u8) -> bool {
     return
         (c >= b'0' && c <= b'9') ||
             (c >= b'a' && c <= b'z') ||
             (c >= b'A' && c <= b'Z') ||
             c == b'_'
+}
 fn is_integer_literal_start(c: u8) -> bool {
     return c >= b'0' && c <= b'9';
+}
 fn maybe_number_remaining(c: u8) -> bool {
     // Note: hex range includes the possible binary indicator 'b' and 'B';
     return
         (c == b'o' || c == b'O' || c == b'x' || c == b'X') ||
             (c >= b'0' && c <= b'9') || (c >= b'A' && c <= b'F') || (c >= b'a' && c <= b'f') ||
             c == b'_';
+}
 #[cfg(test)]
 mod tests {
     use super::*;
     // TODO: Remove at some point
     #[test]
     fn test_tokenizer() {
         let mut source = InputSource::new_test("
         #version 500
         # hello 2
         import std.reo::*;
         struct Thing {
             int a: 5,
+        }
         enum Hello {
             A,
+            B
+        }
         // Hello hello, is it me you are looking for?
         // I can seee it in your eeeyes
         func something(int a, int b, int c) -> byte {
             int a = 5;
             struct Inner {
                 int a
+            }
             struct City {
                 int b
+            }
             /* Waza
             How are you doing
             Things in here yo
             /* */ */
             a = a + 5 * 2;
             struct Pressure {
                 int d
+            }
+        }
         ");
         let mut t = PassTokenizer::new();
         let mut buffer = TokenBuffer::new();
         t.tokenize(&mut source, &mut buffer).expect("tokenize");
         println!("Ranges:\n");
         for (idx, range) in buffer.ranges.iter().enumerate() {
             println!("[{}] {:?}", idx, range)
+        }
         println!("Tokens:\n");
         let mut iter = buffer.tokens.iter().enumerate();
         while let Some((idx, token)) = iter.next() {
             match token.kind {
                 TokenKind::Ident | TokenKind::Pragma | TokenKind::Integer |
                 TokenKind::String | TokenKind::Character | TokenKind::LineComment |
                 TokenKind::BlockComment => {
                     let (_, end) = iter.next().unwrap();
                     println!("[{}] {:?} ......", idx, token.kind);
                     assert_eq!(end.kind, TokenKind::SpanEnd);
                     let text = source.section_at_pos(token.pos, end.pos);
                     println!("{}", String::from_utf8_lossy(text));
                 },
                 _ => {
                     println!("[{}] {:?}", idx, token.kind);
+                }
+            }
+        }
+    }
+}
@@ \ No newline at end of file @@

src/protocol/parser/token_parsing.rs

➞

Show inline comments

 use crate::collections::ScopedSection;
 use crate::protocol::ast::*;
 use crate::protocol::input_source::{
     InputSource as InputSource,
     InputPosition as InputPosition,
     InputSpan,
     ParseError,
 };
 use super::tokens::*;
 use super::symbol_table::*;
 use super::{Module, PassCtx};
 // Keywords
 pub(crate) const KW_LET:       &'static [u8] = b"let";
 pub(crate) const KW_AS:        &'static [u8] = b"as";
 pub(crate) const KW_STRUCT:    &'static [u8] = b"struct";
 pub(crate) const KW_ENUM:      &'static [u8] = b"enum";
 pub(crate) const KW_UNION:     &'static [u8] = b"union";
 pub(crate) const KW_FUNCTION:  &'static [u8] = b"func";
 pub(crate) const KW_PRIMITIVE: &'static [u8] = b"primitive";
 pub(crate) const KW_COMPOSITE: &'static [u8] = b"composite";
 pub(crate) const KW_IMPORT:    &'static [u8] = b"import";
 // Keywords - literals
 pub(crate) const KW_LIT_TRUE:  &'static [u8] = b"true";
 pub(crate) const KW_LIT_FALSE: &'static [u8] = b"false";
 pub(crate) const KW_LIT_NULL:  &'static [u8] = b"null";
 // Keywords - function(like)s
 pub(crate) const KW_CAST:        &'static [u8] = b"cast";
 pub(crate) const KW_FUNC_GET:    &'static [u8] = b"get";
 pub(crate) const KW_FUNC_PUT:    &'static [u8] = b"put";
 pub(crate) const KW_FUNC_FIRES:  &'static [u8] = b"fires";
 pub(crate) const KW_FUNC_CREATE: &'static [u8] = b"create";
 pub(crate) const KW_FUNC_LENGTH: &'static [u8] = b"length";
 pub(crate) const KW_FUNC_ASSERT: &'static [u8] = b"assert";
 // Keywords - statements
 pub(crate) const KW_STMT_CHANNEL:  &'static [u8] = b"channel";
 pub(crate) const KW_STMT_IF:       &'static [u8] = b"if";
 pub(crate) const KW_STMT_ELSE:     &'static [u8] = b"else";
 pub(crate) const KW_STMT_WHILE:    &'static [u8] = b"while";
 pub(crate) const KW_STMT_BREAK:    &'static [u8] = b"break";
 pub(crate) const KW_STMT_CONTINUE: &'static [u8] = b"continue";
 pub(crate) const KW_STMT_GOTO:     &'static [u8] = b"goto";
 pub(crate) const KW_STMT_RETURN:   &'static [u8] = b"return";
 pub(crate) const KW_STMT_SYNC:     &'static [u8] = b"synchronous";
 pub(crate) const KW_STMT_NEW:      &'static [u8] = b"new";
 // Keywords - types
 // Since types are needed for returning diagnostic information to the user, the
 // string variants are put here as well.
 pub(crate) const KW_TYPE_IN_PORT_STR:  &'static str = "in";
 pub(crate) const KW_TYPE_OUT_PORT_STR: &'static str = "out";
 pub(crate) const KW_TYPE_MESSAGE_STR:  &'static str = "msg";
 pub(crate) const KW_TYPE_BOOL_STR:     &'static str = "bool";
 pub(crate) const KW_TYPE_UINT8_STR:    &'static str = "u8";
 pub(crate) const KW_TYPE_UINT16_STR:   &'static str = "u16";
 pub(crate) const KW_TYPE_UINT32_STR:   &'static str = "u32";
 pub(crate) const KW_TYPE_UINT64_STR:   &'static str = "u64";
 pub(crate) const KW_TYPE_SINT8_STR:    &'static str = "s8";
 pub(crate) const KW_TYPE_SINT16_STR:   &'static str = "s16";
 pub(crate) const KW_TYPE_SINT32_STR:   &'static str = "s32";
 pub(crate) const KW_TYPE_SINT64_STR:   &'static str = "s64";
 pub(crate) const KW_TYPE_CHAR_STR:     &'static str = "char";
 pub(crate) const KW_TYPE_STRING_STR:   &'static str = "string";
 pub(crate) const KW_TYPE_INFERRED_STR: &'static str = "auto";
 pub(crate) const KW_TYPE_IN_PORT:  &'static [u8] = KW_TYPE_IN_PORT_STR.as_bytes();
 pub(crate) const KW_TYPE_OUT_PORT: &'static [u8] = KW_TYPE_OUT_PORT_STR.as_bytes();
 pub(crate) const KW_TYPE_MESSAGE:  &'static [u8] = KW_TYPE_MESSAGE_STR.as_bytes();
 pub(crate) const KW_TYPE_BOOL:     &'static [u8] = KW_TYPE_BOOL_STR.as_bytes();
 pub(crate) const KW_TYPE_UINT8:    &'static [u8] = KW_TYPE_UINT8_STR.as_bytes();
 pub(crate) const KW_TYPE_UINT16:   &'static [u8] = KW_TYPE_UINT16_STR.as_bytes();
 pub(crate) const KW_TYPE_UINT32:   &'static [u8] = KW_TYPE_UINT32_STR.as_bytes();
 pub(crate) const KW_TYPE_UINT64:   &'static [u8] = KW_TYPE_UINT64_STR.as_bytes();
 pub(crate) const KW_TYPE_SINT8:    &'static [u8] = KW_TYPE_SINT8_STR.as_bytes();
 pub(crate) const KW_TYPE_SINT16:   &'static [u8] = KW_TYPE_SINT16_STR.as_bytes();
 pub(crate) const KW_TYPE_SINT32:   &'static [u8] = KW_TYPE_SINT32_STR.as_bytes();
 pub(crate) const KW_TYPE_SINT64:   &'static [u8] = KW_TYPE_SINT64_STR.as_bytes();
 pub(crate) const KW_TYPE_CHAR:     &'static [u8] = KW_TYPE_CHAR_STR.as_bytes();
 pub(crate) const KW_TYPE_STRING:   &'static [u8] = KW_TYPE_STRING_STR.as_bytes();
 pub(crate) const KW_TYPE_INFERRED: &'static [u8] = KW_TYPE_INFERRED_STR.as_bytes();
 /// A special trait for when consuming comma-separated things such that we can
 /// push them onto a `Vec` and onto a `ScopedSection`. As we monomorph for
 /// very specific comma-separated cases I don't expect polymorph bloat.
 /// Also, I really don't like this solution.
 pub(crate) trait Extendable {
     type Value;
     fn push(&mut self, v: Self::Value);
+}
 impl<T> Extendable for Vec<T> {
     type Value = T;
     #[inline]
     fn push(&mut self, v: Self::Value) {
         (self as &mut Vec<T>).push(v);
+    }
+}
 impl<T: Sized> Extendable for ScopedSection<T> {
     type Value = T;
     #[inline]
     fn push(&mut self, v: Self::Value) {
         (self as &mut ScopedSection<T>).push(v);
+    }
+}
 /// Consumes a domain-name identifier: identifiers separated by a dot. For
 /// simplification of later parsing and span identification the domain-name may
 /// contain whitespace, but must reside on the same line.
 pub(crate) fn consume_domain_ident<'a>(
     source: &'a InputSource, iter: &mut TokenIter
 ) -> Result<(&'a [u8], InputSpan), ParseError> {
     let (_, mut span) = consume_ident(source, iter)?;
     while let Some(TokenKind::Dot) = iter.next() {
         iter.consume();
         let (_, new_span) = consume_ident(source, iter)?;
         span.end = new_span.end;
+    }
     // Not strictly necessary, but probably a reasonable restriction: this
     // simplifies parsing of module naming and imports.
     if span.begin.line != span.end.line {
         return Err(ParseError::new_error_str_at_span(source, span, "module names may not span multiple lines"));
+    }
     // If module name consists of a single identifier, then it may not match any
     // of the reserved keywords
     let section = source.section_at_pos(span.begin, span.end);
     if is_reserved_keyword(section) {
         return Err(ParseError::new_error_str_at_span(source, span, "encountered reserved keyword"));
+    }
     Ok((source.section_at_pos(span.begin, span.end), span))
+}
 /// Consumes a specific expected token. Be careful to only call this with tokens
 /// that do not have a variable length.
 pub(crate) fn consume_token(source: &InputSource, iter: &mut TokenIter, expected: TokenKind) -> Result<InputSpan, ParseError> {
     if Some(expected) != iter.next() {
         return Err(ParseError::new_error_at_pos(
             source, iter.last_valid_pos(),
             format!("expected '{}'", expected.token_chars())
         ));
+    }
     let span = iter.next_span();
     iter.consume();
     Ok(span)
+}
 /// Consumes a comma separated list until the closing delimiter is encountered
 pub(crate) fn consume_comma_separated_until<T, F, E>(
     close_delim: TokenKind, source: &InputSource, iter: &mut TokenIter, ctx: &mut PassCtx,
     mut consumer_fn: F, target: &mut E, item_name_and_article: &'static str,
     close_pos: Option<&mut InputPosition>
 ) -> Result<(), ParseError>
     where F: FnMut(&InputSource, &mut TokenIter, &mut PassCtx) -> Result<T, ParseError>,
           E: Extendable<Value=T>
+{
     let mut had_comma = true;
     let mut next;
     loop {
         next = iter.next();
         if Some(close_delim) == next {
             if let Some(close_pos) = close_pos {
                 // If requested return the position of the closing delimiter
                 let (_, new_close_pos) = iter.next_positions();
                 *close_pos = new_close_pos;
+            }
             iter.consume();
             break;
         } else if !had_comma || next.is_none() {
             return Err(ParseError::new_error_at_pos(
                 source, iter.last_valid_pos(),
                 format!("expected a '{}', or {}", close_delim.token_chars(), item_name_and_article)
             ));
+        }
         let new_item = consumer_fn(source, iter, ctx)?;
         target.push(new_item);
         next = iter.next();
         had_comma = next == Some(TokenKind::Comma);
         if had_comma {
             iter.consume();
+        }
+    }
     Ok(())
+}
 /// Consumes a comma-separated list of items if the opening delimiting token is
 /// encountered. If not, then the iterator will remain at its current position.
 /// Note that the potential cases may be:
 /// - No opening delimiter encountered, then we return `false`.
 /// - Both opening and closing delimiter encountered, but no items.
 /// - Opening and closing delimiter encountered, and items were processed.
 /// - Found an opening delimiter, but processing an item failed.
 pub(crate) fn maybe_consume_comma_separated<T, F, E>(
     open_delim: TokenKind, close_delim: TokenKind, source: &InputSource, iter: &mut TokenIter, ctx: &mut PassCtx,
     consumer_fn: F, target: &mut E, item_name_and_article: &'static str,
     close_pos: Option<&mut InputPosition>
 ) -> Result<bool, ParseError>
     where F: FnMut(&InputSource, &mut TokenIter, &mut PassCtx) -> Result<T, ParseError>,
           E: Extendable<Value=T>
+{
     if Some(open_delim) != iter.next() {
         return Ok(false);
+    }
     // Opening delimiter encountered, so must parse the comma-separated list.
     iter.consume();
     consume_comma_separated_until(close_delim, source, iter, ctx, consumer_fn, target, item_name_and_article, close_pos)?;
     Ok(true)
+}
 pub(crate) fn maybe_consume_comma_separated_spilled<F: FnMut(&InputSource, &mut TokenIter, &mut PassCtx) -> Result<(), ParseError>>(
     open_delim: TokenKind, close_delim: TokenKind, source: &InputSource,
     iter: &mut TokenIter, ctx: &mut PassCtx,
     mut consumer_fn: F, item_name_and_article: &'static str
 ) -> Result<bool, ParseError> {
     let mut next = iter.next();
     if Some(open_delim) != next {
         return Ok(false);
+    }
     iter.consume();
     let mut had_comma = true;
     loop {
         next = iter.next();
         if Some(close_delim) == next {
             iter.consume();
             break;
         } else if !had_comma {
             return Err(ParseError::new_error_at_pos(
                 source, iter.last_valid_pos(),
                 format!("expected a '{}', or {}", close_delim.token_chars(), item_name_and_article)
             ));
+        }
         consumer_fn(source, iter, ctx)?;
         next = iter.next();
         had_comma = next == Some(TokenKind::Comma);
         if had_comma {
             iter.consume();
+        }
+    }
     Ok(true)
+}
 /// Consumes a comma-separated list and expected the opening and closing
 /// characters to be present. The returned array may still be empty
 pub(crate) fn consume_comma_separated<T, F, E>(
     open_delim: TokenKind, close_delim: TokenKind, source: &InputSource,
     iter: &mut TokenIter, ctx: &mut PassCtx,
     consumer_fn: F, target: &mut E, item_name_and_article: &'static str,
     list_name_and_article: &'static str, close_pos: Option<&mut InputPosition>
 ) -> Result<(), ParseError>
     where F: FnMut(&InputSource, &mut TokenIter, &mut PassCtx) -> Result<T, ParseError>,
           E: Extendable<Value=T>
+{
     let first_pos = iter.last_valid_pos();
     match maybe_consume_comma_separated(
         open_delim, close_delim, source, iter, ctx, consumer_fn, target,
         item_name_and_article, close_pos
     ) {
         Ok(true) => Ok(()),
         Ok(false) => {
             return Err(ParseError::new_error_at_pos(
                 source, first_pos,
                 format!("expected {}", list_name_and_article)
             ));
         },
         Err(err) => Err(err)
+    }
+}
 /// Consumes an integer literal, may be binary, octal, hexadecimal or decimal,
 /// and may have separating '_'-characters.
 /// TODO: @Cleanup, @Performance
 pub(crate) fn consume_integer_literal(source: &InputSource, iter: &mut TokenIter, buffer: &mut String) -> Result<(u64, InputSpan), ParseError> {
     if Some(TokenKind::Integer) != iter.next() {
         return Err(ParseError::new_error_str_at_pos(source, iter.last_valid_pos(), "expected an integer literal"));
+    }
     let integer_span = iter.next_span();
     iter.consume();
     let integer_text = source.section_at_span(integer_span);
     // Determine radix and offset from prefix
     let (radix, input_offset, radix_name) =
         if integer_text.starts_with(b"0b") || integer_text.starts_with(b"0B") {
             // Binary number
             (2, 2, "binary")
         } else if integer_text.starts_with(b"0o") || integer_text.starts_with(b"0O") {
             // Octal number
             (8, 2, "octal")
         } else if integer_text.starts_with(b"0x") || integer_text.starts_with(b"0X") {
             // Hexadecimal number
             (16, 2, "hexadecimal")
         } else {
             (10, 0, "decimal")
         };
     // Take out any of the separating '_' characters
     buffer.clear();
     for char_idx in input_offset..integer_text.len() {
         let char = integer_text[char_idx];
         if char == b'_' {
             continue;
+        }
         if !((char >= b'0' && char <= b'9') || (char >= b'A' && char <= b'F') || (char >= b'a' || char <= b'f')) {
             return Err(ParseError::new_error_at_span(
                 source, integer_span,
                 format!("incorrectly formatted {} number", radix_name)
             ));
+        }
         buffer.push(char::from(char));
+    }
     // Use the cleaned up string to convert to integer
     match u64::from_str_radix(&buffer, radix) {
         Ok(number) => Ok((number, integer_span)),
         Err(_) => Err(ParseError::new_error_at_span(
             source, integer_span,
             format!("incorrectly formatted {} number", radix_name)
         )),
+    }
+}
 /// Consumes a character literal. We currently support a limited number of
 /// backslash-escaped characters
 pub(crate) fn consume_character_literal(
     source: &InputSource, iter: &mut TokenIter
 ) -> Result<(char, InputSpan), ParseError> {
     if Some(TokenKind::Character) != iter.next() {
         return Err(ParseError::new_error_str_at_pos(source, iter.last_valid_pos(), "expected a character literal"));
+    }
     let span = iter.next_span();
     iter.consume();
     let char_text = source.section_at_span(span);
     if !char_text.is_ascii() {
         return Err(ParseError::new_error_str_at_span(
             source, span, "expected an ASCII character literal"
         ));
+    }
     match char_text.len() {
 => return Err(ParseError::new_error_str_at_span(source, span, "too little characters in character literal")),
 => {
             // We already know the text is ascii, so just throw an error if we have the escape
             // character.
             if char_text[0] == b'\\' {
                 return Err(ParseError::new_error_str_at_span(source, span, "escape character without subsequent character"));
+            }
             return Ok((char_text[0] as char, span));
         },
 => {
             if char_text[0] == b'\\' {
-                let result = parse_escaped_character(source, iter.last_valid_pos(), char_text[1])?;
+                let result = parse_escaped_character(source, span, char_text[1])?;
                 return Ok((result, span))
+            }
         },
         _ => {}
+    }
     return Err(ParseError::new_error_str_at_span(source, span, "too many characters in character literal"))
+}
 /// Consumes a string literal. We currently support a limited number of
 /// backslash-escaped characters. Note that the result is stored in the
 /// buffer.
 pub(crate) fn consume_string_literal(
     source: &InputSource, iter: &mut TokenIter, buffer: &mut String
 ) -> Result<InputSpan, ParseError> {
     if Some(TokenKind::String) != iter.next() {
         return Err(ParseError::new_error_str_at_pos(source, iter.last_valid_pos(), "expected a string literal"));
+    }
     buffer.clear();
     let span = iter.next_span();
     iter.consume();
     let text = source.section_at_span(span);
     if !text.is_ascii() {
         return Err(ParseError::new_error_str_at_span(source, span, "expected an ASCII string literal"));
+    }
     buffer.reserve(text.len());
     let mut was_escape = false;
     for idx in 0..text.len() {
         let cur = text[idx];
         if cur != b'\\' {
             if was_escape {
-                let to_push = parse_escaped_character(source, iter.last_valid_pos(), cur)?;
+                let to_push = parse_escaped_character(source, span, cur)?;
                 buffer.push(to_push);
             } else {
                 buffer.push(cur as char);
+            }
             was_escape = false;
         } else {
             was_escape = true;
+        }
+    }
     debug_assert!(!was_escape); // because otherwise we couldn't have ended the string literal
     Ok(span)
+}
-fn parse_escaped_character(source: &InputSource, pos: InputPosition, v: u8) -> Result<char, ParseError> {
+fn parse_escaped_character(source: &InputSource, literal_span: InputSpan, v: u8) -> Result<char, ParseError> {
     let result = match v {
         b'r' => '\r',
         b'n' => '\n',
         b't' => '\t',
         b'0' => '\0',
         b'\\' => '\\',
         b'\'' => '\'',
         b'"' => '"',
         v => return Err(ParseError::new_error_at_pos(
             source, pos, format!("unexpected escaped character '{}'", v)
         )),
         v => {
             let msg = if v.is_ascii_graphic() {
                 format!("unsupported escape character '{}'", v as char)
             } else {
                 format!("unsupported escape character with (unsigned) byte value {}", v)
             };
             return Err(ParseError::new_error_at_span(source, literal_span, msg))
         },
     };
     Ok(result)
+}
 pub(crate) fn consume_pragma<'a>(source: &'a InputSource, iter: &mut TokenIter) -> Result<(&'a [u8], InputPosition, InputPosition), ParseError> {
     if Some(TokenKind::Pragma) != iter.next() {
         return Err(ParseError::new_error_str_at_pos(source, iter.last_valid_pos(), "expected a pragma"));
+    }
     let (pragma_start, pragma_end) = iter.next_positions();
     iter.consume();
     Ok((source.section_at_pos(pragma_start, pragma_end), pragma_start, pragma_end))
+}
 pub(crate) fn has_ident(source: &InputSource, iter: &mut TokenIter, expected: &[u8]) -> bool {
     peek_ident(source, iter).map_or(false, |section| section == expected)
+}
 pub(crate) fn peek_ident<'a>(source: &'a InputSource, iter: &mut TokenIter) -> Option<&'a [u8]> {
     if Some(TokenKind::Ident) == iter.next() {
         let (start, end) = iter.next_positions();
         return Some(source.section_at_pos(start, end))
+    }
     None
+}
 /// Consumes any identifier and returns it together with its span. Does not
 /// check if the identifier is a reserved keyword.
 pub(crate) fn consume_any_ident<'a>(
     source: &'a InputSource, iter: &mut TokenIter
 ) -> Result<(&'a [u8], InputSpan), ParseError> {
     if Some(TokenKind::Ident) != iter.next() {
         return Err(ParseError::new_error_str_at_pos(source, iter.last_valid_pos(), "expected an identifier"));
+    }
     let (ident_start, ident_end) = iter.next_positions();
     iter.consume();
     Ok((source.section_at_pos(ident_start, ident_end), InputSpan::from_positions(ident_start, ident_end)))
+}
 /// Consumes a specific identifier. May or may not be a reserved keyword.
 pub(crate) fn consume_exact_ident(source: &InputSource, iter: &mut TokenIter, expected: &[u8]) -> Result<InputSpan, ParseError> {
     let (ident, pos) = consume_any_ident(source, iter)?;
     if ident != expected {
         debug_assert!(expected.is_ascii());
         return Err(ParseError::new_error_at_pos(
             source, iter.last_valid_pos(),
             format!("expected the text '{}'", &String::from_utf8_lossy(expected))
         ));
+    }
     Ok(pos)
+}
 /// Consumes an identifier that is not a reserved keyword and returns it
 /// together with its span.
 pub(crate) fn consume_ident<'a>(
     source: &'a InputSource, iter: &mut TokenIter
 ) -> Result<(&'a [u8], InputSpan), ParseError> {
     let (ident, span) = consume_any_ident(source, iter)?;
     if is_reserved_keyword(ident) {
         return Err(ParseError::new_error_str_at_span(source, span, "encountered reserved keyword"));
+    }
     Ok((ident, span))
+}
 /// Consumes an identifier and immediately intern it into the `StringPool`
 pub(crate) fn consume_ident_interned(
     source: &InputSource, iter: &mut TokenIter, ctx: &mut PassCtx
 ) -> Result<Identifier, ParseError> {
     let (value, span) = consume_ident(source, iter)?;
     let value = ctx.pool.intern(value);
     Ok(Identifier{ span, value })
+}
 fn is_reserved_definition_keyword(text: &[u8]) -> bool {
     match text {
         KW_STRUCT | KW_ENUM | KW_UNION | KW_FUNCTION | KW_PRIMITIVE | KW_COMPOSITE => true,
         _ => false,
+    }
+}
 fn is_reserved_statement_keyword(text: &[u8]) -> bool {
     match text {
         KW_IMPORT | KW_AS |
         KW_STMT_CHANNEL | KW_STMT_IF | KW_STMT_WHILE |
         KW_STMT_BREAK | KW_STMT_CONTINUE | KW_STMT_GOTO | KW_STMT_RETURN |
         KW_STMT_SYNC | KW_STMT_NEW => true,
         _ => false,
+    }
+}
 fn is_reserved_expression_keyword(text: &[u8]) -> bool {
     match text {
         KW_LET | KW_CAST |
         KW_LIT_TRUE | KW_LIT_FALSE | KW_LIT_NULL |
         KW_FUNC_GET | KW_FUNC_PUT | KW_FUNC_FIRES | KW_FUNC_CREATE | KW_FUNC_ASSERT | KW_FUNC_LENGTH => true,
         _ => false,
+    }
+}
 fn is_reserved_type_keyword(text: &[u8]) -> bool {
     match text {
         KW_TYPE_IN_PORT | KW_TYPE_OUT_PORT | KW_TYPE_MESSAGE | KW_TYPE_BOOL |
         KW_TYPE_UINT8 | KW_TYPE_UINT16 | KW_TYPE_UINT32 | KW_TYPE_UINT64 |
         KW_TYPE_SINT8 | KW_TYPE_SINT16 | KW_TYPE_SINT32 | KW_TYPE_SINT64 |
         KW_TYPE_CHAR | KW_TYPE_STRING |
         KW_TYPE_INFERRED => true,
         _ => false,
+    }
+}
 fn is_reserved_keyword(text: &[u8]) -> bool {
     return
         is_reserved_definition_keyword(text) ||
         is_reserved_statement_keyword(text) ||
         is_reserved_expression_keyword(text) ||
         is_reserved_type_keyword(text);
+}
 pub(crate) fn seek_module(modules: &[Module], root_id: RootId) -> Option<&Module> {
     for module in modules {
         if module.root_id == root_id {
             return Some(module)
+        }
+    }
     return None
+}
 /// Constructs a human-readable message indicating why there is a conflict of
 /// symbols.
 // Note: passing the `module_idx` is not strictly necessary, but will prevent
 // programmer mistakes during development: we get a conflict because we're
 // currently parsing a particular module.
 pub(crate) fn construct_symbol_conflict_error(
     modules: &[Module], module_idx: usize, ctx: &PassCtx, new_symbol: &Symbol, old_symbol: &Symbol
 ) -> ParseError {
     let module = &modules[module_idx];
     let get_symbol_span_and_msg = |symbol: &Symbol| -> (String, Option<InputSpan>) {
         match &symbol.variant {
             SymbolVariant::Module(module) => {
                 let import = &ctx.heap[module.introduced_at];
                 return (
                     format!("the module aliased as '{}' imported here", symbol.name.as_str()),
                     Some(import.as_module().span)
                 );
             },
             SymbolVariant::Definition(definition) => {
                 if definition.defined_in_module.is_invalid() {
                     // Must be a builtin thing
                     return (format!("the builtin '{}'", symbol.name.as_str()), None)
                 } else {
                     if let Some(import_id) = definition.imported_at {
                         let import = &ctx.heap[import_id];
                         return (
                             format!("the type '{}' imported here", symbol.name.as_str()),
                             Some(import.as_symbols().span)
                         );
                     } else {
                         // This is a defined symbol. So this must mean that the
                         // error was caused by it being defined.
                         debug_assert_eq!(definition.defined_in_module, module.root_id);
                         return (
                             format!("the type '{}' defined here", symbol.name.as_str()),
                             Some(definition.identifier_span)
+                        )
+                    }
+                }
+            }
+        }
     };
     let (new_symbol_msg, new_symbol_span) = get_symbol_span_and_msg(new_symbol);
     let (old_symbol_msg, old_symbol_span) = get_symbol_span_and_msg(old_symbol);
     let new_symbol_span = new_symbol_span.unwrap(); // because new symbols cannot be builtin
     match old_symbol_span {
         Some(old_symbol_span) => ParseError::new_error_at_span(
             &module.source, new_symbol_span, format!("symbol is defined twice: {}", new_symbol_msg)
         ).with_info_at_span(
             &module.source, old_symbol_span, format!("it conflicts with {}", old_symbol_msg)
         ),
         None => ParseError::new_error_at_span(
             &module.source, new_symbol_span,
             format!("symbol is defined twice: {} conflicts with {}", new_symbol_msg, old_symbol_msg)
+        )
+    }
+}
@@ \ No newline at end of file @@

src/protocol/tests/mod.rs

➞

Show inline comments

 /**
  * protocol/tests.rs
+ *
  * Contains tests for various parts of the lexer/parser and the evaluator of the
  * code. These are intended to be temporary tests such that we're sure that we
  * don't break existing functionality.
+ *
  * In the future these should be replaced by proper testing protocols.
+ *
  * If any of these tests fail, and you think they're not needed anymore, feel
  * free to cast them out into oblivion, where dead code goes to die.
  */
 mod utils;
 mod lexer;
 mod parser_validation;
 mod parser_inference;
 mod parser_monomorphs;
 mod parser_imports;
 mod parser_binding;
 mod parser_literals;
 mod eval_operators;
 mod eval_calls;
 mod eval_casting;
 mod eval_binding;
 mod eval_silly;
 pub(crate) use utils::{Tester}; // the testing harness
 pub(crate) use crate::protocol::eval::value::*; // to test functions
@@ \ No newline at end of file @@

src/protocol/tests/parser_literals.rs

➞

Show inline comments

@@ new file 100644 @@
 use super::*;
 #[test]
 fn test_binary_literals() {
     Tester::new_single_source_expect_ok("valid", "
         func test() -> u32 {
             u8  v1 = 0b0100_0010;
             u16 v2 = 0b10101010;
             u32 v3 = 0b10000001_01111110;
             u64 v4 = 0b1001_0110_1001_0110;
             return 0b10110;
+        }
     ");
     Tester::new_single_source_expect_err("invalid character", "
         func test() -> u32 {
             return 0b10011001_10012001;
+        }
     ").error(|e| { e.assert_msg_has(0, "incorrectly formatted binary number"); });
     Tester::new_single_source_expect_err("no characters", "
         func test() -> u32 { return 0b; }
     ").error(|e| { e.assert_msg_has(0, "incorrectly formatted binary number"); });
     Tester::new_single_source_expect_err("only separators", "
         func test() -> u32 { return 0b____; }
     ").error(|e| { e.assert_msg_has(0, "incorrectly formatted binary number"); });
+}
 #[test]
 fn test_string_literals() {
     Tester::new_single_source_expect_ok("valid", "
         func test() -> string {
             auto v1 = \"Hello, world!\";
             auto v2 = \"\\t\\r\\n\\\\\"; // why hello there, confusing thing
             auto v3 = \"\";
             return \"No way, dude!\";
+        }
     ").for_function("test", |f| { f
         .for_variable("v1", |v| { v.assert_concrete_type("string"); })
         .for_variable("v2", |v| { v.assert_concrete_type("string"); })
         .for_variable("v3", |v| { v.assert_concrete_type("string"); });
     });
     Tester::new_single_source_expect_err("unterminated simple", "
         func test() -> string { return \"'; }
     ").error(|e| { e
         .assert_num(1)
         .assert_occurs_at(0, "\"")
         .assert_msg_has(0, "unterminated");
     });
     Tester::new_single_source_expect_err("unterminated with preceding escaped", "
         func test() -> string { return \"\\\"; }
     ").error(|e| { e
         .assert_num(1)
         .assert_occurs_at(0, "\"\\")
         .assert_msg_has(0, "unterminated");
     });
     Tester::new_single_source_expect_err("invalid escaped character", "
         func test() -> string { return \"\\y\"; }
     ").error(|e| { e.assert_msg_has(0, "unsupported escape character 'y'"); });
+}
@@ \ No newline at end of file @@

0 comments (0 inline, 0 general)