CSY/reowolf Changeset - 2693293dc537 · Centrum Wiskunde & Informatica (CWI)

Changeset - 2693293dc537

Parent rev.

Child rev.

[Not reviewed]

0 4 0

mh - 3 years ago 2022-03-29 16:43:55
contact@maxhenger.nl

Remove code that produced token ranges

4 files changed with 27 insertions and 252 deletions:

src/protocol/parser/mod.rs

src/protocol/parser/pass_tokenizer.rs

149

src/protocol/parser/tokens.rs

src/protocol/token_writer.rs

0 comments (0 inline, 0 general)

src/protocol/parser/mod.rs

➞

Show inline comments

@@ @@ -15,379 +15,379 @@ pub(crate) mod pass_stack_size; @@
 use tokens::*;
 use crate::collections::*;
 use visitor::Visitor;
 use pass_tokenizer::PassTokenizer;
 use pass_symbols::PassSymbols;
 use pass_imports::PassImport;
 use pass_definitions::PassDefinitions;
 use pass_validation_linking::PassValidationLinking;
 use pass_typing::{PassTyping, ResolveQueue};
 use pass_rewriting::PassRewriting;
 use pass_stack_size::PassStackSize;
 use symbol_table::*;
 use type_table::*;
 use crate::protocol::ast::*;
 use crate::protocol::input_source::*;
 use crate::protocol::ast_writer::ASTWriter;
 use crate::protocol::parser::type_table::PolymorphicVariable;
 use crate::protocol::token_writer::TokenWriter;
 const REOWOLF_PATH_ENV: &'static str = "REOWOLF_ROOT"; // first lookup reowolf path
 const REOWOLF_PATH_DIR: &'static str = "std"; // then try folder in current working directory
 #[derive(Debug, PartialEq, Eq, PartialOrd, Ord)]
 pub enum ModuleCompilationPhase {
     Tokenized,              // source is tokenized
     SymbolsScanned,         // all definitions are linked to their type class
     ImportsResolved,        // all imports are added to the symbol table
     DefinitionsParsed,      // produced the AST for the entire module
     TypesAddedToTable,      // added all definitions to the type table
     ValidatedAndLinked,     // AST is traversed and has linked the required AST nodes
     Typed,                  // Type inference and checking has been performed
     Rewritten,              // Special AST nodes are rewritten into regular AST nodes
     // When we continue with the compiler:
     // StackSize
+}
 pub struct Module {
     pub source: InputSource,
     pub tokens: TokenBuffer,
     pub is_compiler_file: bool, // TODO: @Hack
     pub root_id: RootId,
     pub name: Option<(PragmaId, StringRef<'static>)>,
     pub version: Option<(PragmaId, i64)>,
     pub phase: ModuleCompilationPhase,
+}
 pub struct TargetArch {
     pub void_type_id: TypeId,
     pub message_type_id: TypeId,
     pub bool_type_id: TypeId,
     pub uint8_type_id: TypeId,
     pub uint16_type_id: TypeId,
     pub uint32_type_id: TypeId,
     pub uint64_type_id: TypeId,
     pub sint8_type_id: TypeId,
     pub sint16_type_id: TypeId,
     pub sint32_type_id: TypeId,
     pub sint64_type_id: TypeId,
     pub char_type_id: TypeId,
     pub string_type_id: TypeId,
     pub array_type_id: TypeId,
     pub slice_type_id: TypeId,
     pub input_type_id: TypeId,
     pub output_type_id: TypeId,
     pub pointer_type_id: TypeId,
+}
 impl TargetArch {
     fn new() -> Self {
         return Self{
             void_type_id: TypeId::new_invalid(),
             bool_type_id: TypeId::new_invalid(),
             message_type_id: TypeId::new_invalid(),
             uint8_type_id: TypeId::new_invalid(),
             uint16_type_id: TypeId::new_invalid(),
             uint32_type_id: TypeId::new_invalid(),
             uint64_type_id: TypeId::new_invalid(),
             sint8_type_id: TypeId::new_invalid(),
             sint16_type_id: TypeId::new_invalid(),
             sint32_type_id: TypeId::new_invalid(),
             sint64_type_id: TypeId::new_invalid(),
             char_type_id: TypeId::new_invalid(),
             string_type_id: TypeId::new_invalid(),
             array_type_id: TypeId::new_invalid(),
             slice_type_id: TypeId::new_invalid(),
             input_type_id: TypeId::new_invalid(),
             output_type_id: TypeId::new_invalid(),
             pointer_type_id: TypeId::new_invalid(),
+        }
+    }
+}
 pub struct PassCtx<'a> {
     heap: &'a mut Heap,
     symbols: &'a mut SymbolTable,
     pool: &'a mut StringPool,
     arch: &'a TargetArch,
+}
 pub struct Parser {
     // Storage of all information created/gathered during compilation.
     pub(crate) heap: Heap,
     pub(crate) string_pool: StringPool, // Do not deallocate, holds all strings
     pub(crate) modules: Vec<Module>,
     pub(crate) symbol_table: SymbolTable,
     pub(crate) type_table: TypeTable,
     pub(crate) global_module_index: usize, // contains globals, implicitly imported everywhere
     // Compiler passes, used as little state machine that keep their memory
     // around.
     pass_tokenizer: PassTokenizer,
     pass_symbols: PassSymbols,
     pass_import: PassImport,
     pass_definitions: PassDefinitions,
     pass_validation: PassValidationLinking,
     pass_typing: PassTyping,
     pass_rewriting: PassRewriting,
     pass_stack_size: PassStackSize,
     // Compiler options
     pub write_tokens_to: Option<String>,
     pub write_ast_to: Option<String>,
     pub(crate) arch: TargetArch,
+}
 impl Parser {
     pub fn new() -> Result<Self, String> {
         let mut parser = Parser{
             heap: Heap::new(),
             string_pool: StringPool::new(),
             modules: Vec::new(),
             symbol_table: SymbolTable::new(),
             type_table: TypeTable::new(),
             global_module_index: 0,
             pass_tokenizer: PassTokenizer::new(),
             pass_symbols: PassSymbols::new(),
             pass_import: PassImport::new(),
             pass_definitions: PassDefinitions::new(),
             pass_validation: PassValidationLinking::new(),
             pass_typing: PassTyping::new(),
             pass_rewriting: PassRewriting::new(),
             pass_stack_size: PassStackSize::new(),
             write_tokens_to: None,
             write_ast_to: None,
             arch: TargetArch::new(),
         };
         parser.symbol_table.insert_scope(None, SymbolScope::Global);
         // Insert builtin types
         // TODO: At some point use correct values for size/alignment
         parser.arch.void_type_id    = insert_builtin_type(&mut parser.type_table, vec![ConcreteTypePart::Void], false, 0, 1);
         parser.arch.message_type_id = insert_builtin_type(&mut parser.type_table, vec![ConcreteTypePart::Message], false, 24, 8);
         parser.arch.bool_type_id    = insert_builtin_type(&mut parser.type_table, vec![ConcreteTypePart::Bool], false, 1, 1);
         parser.arch.uint8_type_id   = insert_builtin_type(&mut parser.type_table, vec![ConcreteTypePart::UInt8], false, 1, 1);
         parser.arch.uint16_type_id  = insert_builtin_type(&mut parser.type_table, vec![ConcreteTypePart::UInt16], false, 2, 2);
         parser.arch.uint32_type_id  = insert_builtin_type(&mut parser.type_table, vec![ConcreteTypePart::UInt32], false, 4, 4);
         parser.arch.uint64_type_id  = insert_builtin_type(&mut parser.type_table, vec![ConcreteTypePart::UInt64], false, 8, 8);
         parser.arch.sint8_type_id   = insert_builtin_type(&mut parser.type_table, vec![ConcreteTypePart::SInt8], false, 1, 1);
         parser.arch.sint16_type_id  = insert_builtin_type(&mut parser.type_table, vec![ConcreteTypePart::SInt16], false, 2, 2);
         parser.arch.sint32_type_id  = insert_builtin_type(&mut parser.type_table, vec![ConcreteTypePart::SInt32], false, 4, 4);
         parser.arch.sint64_type_id  = insert_builtin_type(&mut parser.type_table, vec![ConcreteTypePart::SInt64], false, 8, 8);
         parser.arch.char_type_id    = insert_builtin_type(&mut parser.type_table, vec![ConcreteTypePart::Character], false, 4, 4);
         parser.arch.string_type_id  = insert_builtin_type(&mut parser.type_table, vec![ConcreteTypePart::String], false, 24, 8);
         parser.arch.array_type_id   = insert_builtin_type(&mut parser.type_table, vec![ConcreteTypePart::Array, ConcreteTypePart::Void], true, 24, 8);
         parser.arch.slice_type_id   = insert_builtin_type(&mut parser.type_table, vec![ConcreteTypePart::Slice, ConcreteTypePart::Void], true, 16, 4);
         parser.arch.input_type_id   = insert_builtin_type(&mut parser.type_table, vec![ConcreteTypePart::Input, ConcreteTypePart::Void], true, 8, 8);
         parser.arch.output_type_id  = insert_builtin_type(&mut parser.type_table, vec![ConcreteTypePart::Output, ConcreteTypePart::Void], true, 8, 8);
         parser.arch.pointer_type_id = insert_builtin_type(&mut parser.type_table, vec![ConcreteTypePart::Pointer, ConcreteTypePart::Void], true, 8, 8);
         // Parse standard library
         parser.feed_standard_library()?;
         return Ok(parser)
+    }
     /// Feeds a new InputSource to the parser, which will tokenize it and store
     /// it internally for later parsing (when all modules are present). Returns
     /// the index of the new module.
     pub fn feed(&mut self, mut source: InputSource) -> Result<usize, ParseError> {
         return self.feed_internal(source, false);
+    }
     pub fn parse(&mut self) -> Result<(), ParseError> {
         let mut pass_ctx = PassCtx{
             heap: &mut self.heap,
             symbols: &mut self.symbol_table,
             pool: &mut self.string_pool,
             arch: &self.arch,
         };
         if let Some(filename) = &self.write_tokens_to {
             let mut writer = TokenWriter::new();
             let mut file = std::fs::File::create(std::path::Path::new(filename)).unwrap();
             writer.write(&mut file, &self.modules);
+        }
         // Advance all modules to the phase where all symbols are scanned
         for module_idx in 0..self.modules.len() {
             self.pass_symbols.parse(&mut self.modules, module_idx, &mut pass_ctx)?;
+        }
         // With all symbols scanned, perform further compilation until we can
         // add all base types to the type table.
         for module_idx in 0..self.modules.len() {
             self.pass_import.parse(&mut self.modules, module_idx, &mut pass_ctx)?;
             self.pass_definitions.parse(&mut self.modules, module_idx, &mut pass_ctx)?;
+        }
         if let Some(filename) = &self.write_tokens_to {
             let mut writer = TokenWriter::new();
             let mut file = std::fs::File::create(std::path::Path::new(filename)).unwrap();
             writer.write(&mut file, &self.modules);
+        }
         // Add every known type to the type table
         self.type_table.build_base_types(&mut self.modules, &mut pass_ctx)?;
         // Continue compilation with the remaining phases now that the types
         // are all in the type table
         for module_idx in 0..self.modules.len() {
             let mut ctx = visitor::Ctx{
                 heap: &mut self.heap,
                 modules: &mut self.modules,
                 module_idx,
                 symbols: &mut self.symbol_table,
                 types: &mut self.type_table,
                 arch: &self.arch,
             };
             self.pass_validation.visit_module(&mut ctx)?;
+        }
         // Perform typechecking on all modules
         let mut queue = ResolveQueue::new();
         for module_idx in 0..self.modules.len() {
             let mut ctx = visitor::Ctx{
                 heap: &mut self.heap,
                 modules: &mut self.modules,
                 module_idx,
                 symbols: &mut self.symbol_table,
                 types: &mut self.type_table,
                 arch: &self.arch,
             };
             self.pass_typing.queue_module_definitions(&mut ctx, &mut queue);
         };
         while !queue.is_empty() {
             let top = queue.pop_front().unwrap();
             let mut ctx = visitor::Ctx{
                 heap: &mut self.heap,
                 modules: &mut self.modules,
                 module_idx: top.root_id.index as usize,
                 symbols: &mut self.symbol_table,
                 types: &mut self.type_table,
                 arch: &self.arch,
             };
             self.pass_typing.handle_module_definition(&mut ctx, &mut queue, top)?;
+        }
         // Rewrite nodes in tree, then prepare for execution of code
         for module_idx in 0..self.modules.len() {
             self.modules[module_idx].phase = ModuleCompilationPhase::Typed;
             let mut ctx = visitor::Ctx{
                 heap: &mut self.heap,
                 modules: &mut self.modules,
                 module_idx,
                 symbols: &mut self.symbol_table,
                 types: &mut self.type_table,
                 arch: &self.arch,
             };
             self.pass_rewriting.visit_module(&mut ctx)?;
             self.pass_stack_size.visit_module(&mut ctx)?;
+        }
         // Write out desired information
         if let Some(filename) = &self.write_ast_to {
             let mut writer = ASTWriter::new();
             let mut file = std::fs::File::create(std::path::Path::new(filename)).unwrap();
             writer.write_ast(&mut file, &self.heap);
+        }
         Ok(())
+    }
     /// Tries to find the standard library and add the files for parsing.
     fn feed_standard_library(&mut self) -> Result<(), String> {
         use std::env;
         use std::path::{Path, PathBuf};
         use std::fs;
         const FILES: [&'static str; 1] = [
             "std.global.pdl",
         ];
         // Determine base directory
         let (base_path, from_env) = if let Ok(path) = env::var(REOWOLF_PATH_ENV) {
             // Path variable is set
             (path, true)
         } else {
             let mut path = String::with_capacity(REOWOLF_PATH_DIR.len() + 2);
             path.push_str("./");
             path.push_str(REOWOLF_PATH_DIR);
             (path, false)
         };
         // Make sure directory exists
         let path = Path::new(&base_path);
         if !path.exists() {
             return Err(format!("std lib root directory '{}' does not exist", base_path));
+        }
         // Try to load all standard library files. We might need a more unified
         // way to do this in the future (i.e. a "std" package, containing all
         // of the modules)
         let mut file_path = PathBuf::new();
         let mut first_file = true;
         for file in FILES {
             file_path.push(path);
             file_path.push(file);
             let source = fs::read(file_path.as_path());
             if let Err(err) = source {
                 return Err(format!(
                     "failed to read std lib file '{}' in root directory '{}', because: {}",
                     file, base_path, err
                 ));
+            }
             let source = source.unwrap();
             let input_source = InputSource::new(file.to_string(), source);
             let module_index = self.feed_internal(input_source, true);
             if let Err(err) = module_index {
                 // A bit of a hack, but shouldn't really happen anyway: the
                 // compiler should ship with a decent standard library (at some
                 // point)
                 return Err(format!("{}", err));
+            }
             let module_index = module_index.unwrap();
             if first_file {
                 self.global_module_index = module_index;
                 first_file = false;
+            }
+        }
         return Ok(())
+    }
     fn feed_internal(&mut self, mut source: InputSource, is_compiler_file: bool) -> Result<usize, ParseError> {
         let mut token_buffer = TokenBuffer::new();
         self.pass_tokenizer.tokenize(&mut source, &mut token_buffer)?;
         let module = Module{
             source,
             tokens: token_buffer,
             is_compiler_file,
             root_id: RootId::new_invalid(),
             name: None,
             version: None,
             phase: ModuleCompilationPhase::Tokenized,
         };
         let module_index = self.modules.len();
         self.modules.push(module);
         return Ok(module_index);
+    }
+}
 fn insert_builtin_type(type_table: &mut TypeTable, parts: Vec<ConcreteTypePart>, has_poly_var: bool, size: usize, alignment: usize) -> TypeId {
     const POLY_VARS: [PolymorphicVariable; 1] = [PolymorphicVariable{
         identifier: Identifier::new_empty(InputSpan::new()),
         is_in_use: false,
     }];
     let concrete_type = ConcreteType{ parts };
     let poly_var = if has_poly_var {
         POLY_VARS.as_slice()
     } else {
         &[]
     };
     return type_table.add_builtin_data_type(concrete_type, poly_var, size, alignment);
+}
@@ \ No newline at end of file @@

src/protocol/parser/pass_tokenizer.rs

➞

Show inline comments

 use crate::protocol::input_source::{
     InputSource as InputSource,
     ParseError,
     InputPosition as InputPosition,
 };
 use super::tokens::*;
 use super::token_parsing::*;
 /// Tokenizer is a reusable parser to tokenize multiple source files using the
 /// same allocated buffers. In a well-formed program, we produce a consistent
 /// tree of token ranges such that we may identify tokens that represent a
 /// defintion or an import before producing the entire AST.
 ///
 /// If the program is not well-formed then the tree may be inconsistent, but we
 /// will detect this once we transform the tokens into the AST. To ensure a
 /// consistent AST-producing phase we will require the import to have balanced
 /// curly braces
 pub(crate) struct PassTokenizer {
     // Stack of input positions of opening curly braces, used to detect
     // unmatched opening braces, unmatched closing braces are detected
     // immediately.
     curly_stack: Vec<InputPosition>,
     // Points to an element in the `TokenBuffer.ranges` variable.
     stack_idx: usize,
+}
 impl PassTokenizer {
     pub(crate) fn new() -> Self {
         Self{
             curly_stack: Vec::with_capacity(32),
             stack_idx: 0
+        }
+    }
     pub(crate) fn tokenize(&mut self, source: &mut InputSource, target: &mut TokenBuffer) -> Result<(), ParseError> {
         // Assert source and buffer are at start
         debug_assert_eq!(source.pos().offset, 0);
         debug_assert!(target.tokens.is_empty());
         debug_assert!(target.ranges.is_empty());
         // Set up for tokenization by pushing the first range onto the stack.
         // This range may get transformed into the appropriate range kind later,
         // see `push_range` and `pop_range`.
         self.stack_idx = 0;
         target.ranges.push(TokenRange{
             parent_idx: NO_RELATION,
             range_kind: TokenRangeKind::Module,
             curly_depth: 0,
             start: 0,
             end: 0,
             num_child_ranges: 0,
             first_child_idx: NO_RELATION,
             last_child_idx: NO_RELATION,
             next_sibling_idx: NO_RELATION,
         });
         // Main tokenization loop
         while let Some(c) = source.next() {
             let token_index = target.tokens.len() as u32;
             if is_char_literal_start(c) {
                 self.consume_char_literal(source, target)?;
             } else if is_string_literal_start(c) {
                 self.consume_string_literal(source, target)?;
             } else if is_identifier_start(c) {
                 let ident = self.consume_identifier(source, target)?;
                 if demarks_symbol(ident) {
                     self.emit_marker(target, TokenMarkerKind::Definition, token_index);
                     self.push_range(target, TokenRangeKind::Definition, token_index);
                 } else if demarks_import(ident) {
                     self.emit_marker(target, TokenMarkerKind::Import, token_index);
                     self.push_range(target, TokenRangeKind::Import, token_index);
+                }
             } else if is_integer_literal_start(c) {
                 self.consume_number(source, target)?;
             } else if is_pragma_start_or_pound(c) {
                 let was_pragma = self.consume_pragma_or_pound(c, source, target)?;
                 if was_pragma {
                     self.emit_marker(target, TokenMarkerKind::Pragma, token_index);
                     self.push_range(target, TokenRangeKind::Pragma, token_index);
+                }
             } else if self.is_line_comment_start(c, source) {
                 self.consume_line_comment(source, target)?;
             } else if self.is_block_comment_start(c, source) {
                 self.consume_block_comment(source, target)?;
             } else if is_whitespace(c) {
                 self.consume_whitespace(source);
                 let range = &target.ranges[self.stack_idx];
                 if range.range_kind == TokenRangeKind::Pragma {
                     self.pop_range(target, target.tokens.len() as u32);
+                }
             } else {
                 let was_punctuation = self.maybe_parse_punctuation(c, source, target)?;
                 if let Some((token, token_pos)) = was_punctuation {
                     if token == TokenKind::OpenCurly {
                         self.curly_stack.push(token_pos);
                     } else if token == TokenKind::CloseCurly {
                         // Check if this marks the end of a range we're
                         // currently processing
                         if self.curly_stack.is_empty() {
                             return Err(ParseError::new_error_str_at_pos(
                                 source, token_pos, "unmatched closing curly brace '}'"
                             ));
+                        }
                         self.curly_stack.pop();
                         let range = &target.ranges[self.stack_idx];
                         if range.range_kind == TokenRangeKind::Definition && range.curly_depth == self.curly_stack.len() as u32 {
                             self.pop_range(target, target.tokens.len() as u32);
+                        }
                         // Exit early if we have more closing curly braces than
                         // opening curly braces
                     } else if token == TokenKind::SemiColon {
                         // Check if this marks the end of an import
                         let range = &target.ranges[self.stack_idx];
                         if range.range_kind == TokenRangeKind::Import {
                             self.pop_range(target, target.tokens.len() as u32);
+                        }
+                    }
                 } else {
                     return Err(ParseError::new_error_str_at_pos(
                         source, source.pos(), "unexpected character"
                     ));
+                }
+            }
+        }
         // End of file, check if our state is correct
         if let Some(error) = source.had_error.take() {
             return Err(error);
+        }
         if !self.curly_stack.is_empty() {
             // Let's not add a lot of heuristics and just tell the programmer
             // that something is wrong
             let last_unmatched_open = self.curly_stack.pop().unwrap();
             return Err(ParseError::new_error_str_at_pos(
                 source, last_unmatched_open, "unmatched opening curly brace '{'"
             ));
+        }
         // Ranges that did not depend on curly braces may have missing tokens.
         // So close all of the active tokens
         while self.stack_idx != 0 {
             self.pop_range(target, target.tokens.len() as u32);
+        }
         // And finally, we may have a token range at the end that doesn't belong
         // to a range yet, so insert a "code" range if this is the case.
         debug_assert_eq!(self.stack_idx, 0);
         let last_registered_idx = target.ranges[0].end;
         let last_token_idx = target.tokens.len() as u32;
         if last_registered_idx != last_token_idx {
             self.add_code_range(target, 0, last_registered_idx, last_token_idx, NO_RELATION);
+        }
         Ok(())
+    }
     fn is_line_comment_start(&self, first_char: u8, source: &InputSource) -> bool {
         return first_char == b'/' && Some(b'/') == source.lookahead(1);
+    }
     fn is_block_comment_start(&self, first_char: u8, source: &InputSource) -> bool {
         return first_char == b'/' && Some(b'*') == source.lookahead(1);
+    }
     fn maybe_parse_punctuation(
         &mut self, first_char: u8, source: &mut InputSource, target: &mut TokenBuffer
     ) -> Result<Option<(TokenKind, InputPosition)>, ParseError> {
         debug_assert!(first_char != b'#', "'#' needs special handling");
         debug_assert!(first_char != b'\'', "'\'' needs special handling");
         debug_assert!(first_char != b'"', "'\"' needs special handling");
         let pos = source.pos();
         let token_kind;
         if first_char == b'!' {
             source.consume();
             if Some(b'=') == source.next() {
                 source.consume();
                 token_kind = TokenKind::NotEqual;
             } else {
                 token_kind = TokenKind::Exclamation;
+            }
         } else if first_char == b'%' {
             source.consume();
             if Some(b'=') == source.next() {
                 source.consume();
                 token_kind = TokenKind::PercentEquals;
             } else {
                 token_kind = TokenKind::Percent;
+            }
         } else if first_char == b'&' {
             source.consume();
             let next = source.next();
             if Some(b'&') == next {
                 source.consume();
                 token_kind = TokenKind::AndAnd;
             } else if Some(b'=') == next {
                 source.consume();
                 token_kind = TokenKind::AndEquals;
             } else {
                 token_kind = TokenKind::And;
+            }
         } else if first_char == b'(' {
             source.consume();
             token_kind = TokenKind::OpenParen;
         } else if first_char == b')' {
             source.consume();
             token_kind = TokenKind::CloseParen;
         } else if first_char == b'*' {
             source.consume();
             if let Some(b'=') = source.next() {
                 source.consume();
                 token_kind = TokenKind::StarEquals;
             } else {
                 token_kind = TokenKind::Star;
+            }
         } else if first_char == b'+' {
             source.consume();
             let next = source.next();
             if Some(b'+') == next {
                 source.consume();
                 token_kind = TokenKind::PlusPlus;
             } else if Some(b'=') == next {
                 source.consume();
                 token_kind = TokenKind::PlusEquals;
             } else {
                 token_kind = TokenKind::Plus;
+            }
         } else if first_char == b',' {
             source.consume();
             token_kind = TokenKind::Comma;
         } else if first_char == b'-' {
             source.consume();
             let next = source.next();
             if Some(b'-') == next {
                 source.consume();
                 token_kind = TokenKind::MinusMinus;
             } else if Some(b'>') == next {
                 source.consume();
                 token_kind = TokenKind::ArrowRight;
             } else if Some(b'=') == next {
                 source.consume();
                 token_kind = TokenKind::MinusEquals;
             } else {
                 token_kind = TokenKind::Minus;
+            }
         } else if first_char == b'.' {
             source.consume();
             if let Some(b'.') = source.next() {
                 source.consume();
                 token_kind = TokenKind::DotDot;
             } else {
                 token_kind = TokenKind::Dot
+            }
         } else if first_char == b'/' {
             source.consume();
             debug_assert_ne!(Some(b'/'), source.next());
             debug_assert_ne!(Some(b'*'), source.next());
             if let Some(b'=') = source.next() {
                 source.consume();
                 token_kind = TokenKind::SlashEquals;
             } else {
                 token_kind = TokenKind::Slash;
+            }
         } else if first_char == b':' {
             source.consume();
             if let Some(b':') = source.next() {
                 source.consume();
                 token_kind = TokenKind::ColonColon;
             } else {
                 token_kind = TokenKind::Colon;
+            }
         } else if first_char == b';' {
             source.consume();
             token_kind = TokenKind::SemiColon;
         } else if first_char == b'<' {
             source.consume();
             let next = source.next();
             if let Some(b'<') = next {
                 source.consume();
                 if let Some(b'=') = source.next() {
                     source.consume();
                     token_kind = TokenKind::ShiftLeftEquals;
                 } else {
                     token_kind = TokenKind::ShiftLeft;
+                }
             } else if let Some(b'=') = next {
                 source.consume();
                 token_kind = TokenKind::LessEquals;
             } else {
                 token_kind = TokenKind::OpenAngle;
+            }
         } else if first_char == b'=' {
             source.consume();
             if let Some(b'=') = source.next() {
                 source.consume();
                 token_kind = TokenKind::EqualEqual;
             } else {
                 token_kind = TokenKind::Equal;
+            }
         } else if first_char == b'>' {
             source.consume();
             let next = source.next();
             if Some(b'>') == next {
                 source.consume();
                 if Some(b'=') == source.next() {
                     source.consume();
                     token_kind = TokenKind::ShiftRightEquals;
                 } else {
                     token_kind = TokenKind::ShiftRight;
+                }
             } else if Some(b'=') == next {
                 source.consume();
                 token_kind = TokenKind::GreaterEquals;
             } else {
                 token_kind = TokenKind::CloseAngle;
+            }
         } else if first_char == b'?' {
             source.consume();
             token_kind = TokenKind::Question;
         } else if first_char == b'@' {
             source.consume();
             if let Some(b'=') = source.next() {
                 source.consume();
                 token_kind = TokenKind::AtEquals;
             } else {
                 token_kind = TokenKind::At;
+            }
         } else if first_char == b'[' {
             source.consume();
             token_kind = TokenKind::OpenSquare;
         } else if first_char == b']' {
             source.consume();
             token_kind = TokenKind::CloseSquare;
         } else if first_char == b'^' {
             source.consume();
             if let Some(b'=') = source.next() {
                 source.consume();
                 token_kind = TokenKind::CaretEquals;
             } else {
                 token_kind = TokenKind::Caret;
+            }
         } else if first_char == b'{' {
             source.consume();
             token_kind = TokenKind::OpenCurly;
         } else if first_char == b'|' {
@@ @@ -435,370 +379,277 @@ impl PassTokenizer { @@
             if prev_char == b'\\' && c == b'\\' {
                 // Escaped backslash, set prev_char to bogus to not conflict
                 // with escaped-" and unterminated string literal detection.
                 prev_char = b'\0';
             } else {
                 prev_char = c;
+            }
+        }
         if prev_char != b'"' {
             // Unterminated string literal
             return Err(ParseError::new_error_str_at_pos(source, begin_pos, "encountered unterminated string literal"));
+        }
         let end_pos = source.pos();
         target.tokens.push(Token::new(TokenKind::String, begin_pos));
         target.tokens.push(Token::new(TokenKind::SpanEnd, end_pos));
         Ok(())
+    }
     fn consume_pragma_or_pound(&mut self, first_char: u8, source: &mut InputSource, target: &mut TokenBuffer) -> Result<bool, ParseError> {
         let start_pos = source.pos();
         debug_assert_eq!(first_char, b'#');
         source.consume();
         let next = source.next();
         if next.is_none() || !is_identifier_start(next.unwrap()) {
             // Just a pound sign
             target.tokens.push(Token::new(TokenKind::Pound, start_pos));
             Ok(false)
         } else {
             // Pound sign followed by identifier
             source.consume();
             while let Some(c) = source.next() {
                 if !is_identifier_remaining(c) {
                     break;
+                }
                 source.consume();
+            }
             self.check_ascii(source)?;
             let end_pos = source.pos();
             target.tokens.push(Token::new(TokenKind::Pragma, start_pos));
             target.tokens.push(Token::new(TokenKind::SpanEnd, end_pos));
             Ok(true)
+        }
+    }
     fn consume_line_comment(&mut self, source: &mut InputSource, target: &mut TokenBuffer) -> Result<(), ParseError> {
         let begin_pos = source.pos();
         // Consume the leading "//"
         debug_assert!(source.next().unwrap() == b'/' && source.lookahead(1).unwrap() == b'/');
         source.consume();
         source.consume();
         let mut prev_char = b'/';
         let mut cur_char = b'/';
         while let Some(c) = source.next() {
             prev_char = cur_char;
             cur_char = c;
             if c == b'\n' {
                 // End of line, note that the newline is not consumed
                 break;
+            }
             source.consume();
+        }
         let mut end_pos = source.pos();
         debug_assert_eq!(begin_pos.line, end_pos.line);
         // Modify offset to not include the newline characters
         if cur_char == b'\n' {
             if prev_char == b'\r' {
                 end_pos.offset -= 1;
+            }
             // Consume final newline
             source.consume();
         } else {
             // End of comment was due to EOF
             debug_assert!(source.next().is_none())
+        }
         target.tokens.push(Token::new(TokenKind::LineComment, begin_pos));
         target.tokens.push(Token::new(TokenKind::SpanEnd, end_pos));
         Ok(())
+    }
     fn consume_block_comment(&mut self, source: &mut InputSource, target: &mut TokenBuffer) -> Result<(), ParseError> {
         let begin_pos = source.pos();
         // Consume the leading "/*"
         debug_assert!(source.next().unwrap() == b'/' && source.lookahead(1).unwrap() == b'*');
         source.consume();
         source.consume();
         // Explicitly do not put prev_char at "*", because then "/*/" would
         // represent a valid and closed block comment
         let mut prev_char = b' ';
         let mut is_closed = false;
         while let Some(c) = source.next() {
             source.consume();
             if prev_char == b'*' && c == b'/' {
                 // End of block comment
                 is_closed = true;
                 break;
+            }
             prev_char = c;
+        }
         if !is_closed {
             return Err(ParseError::new_error_str_at_pos(
                 source, source.pos(), "encountered unterminated block comment")
             );
+        }
         let end_pos = source.pos();
         target.tokens.push(Token::new(TokenKind::BlockComment, begin_pos));
         target.tokens.push(Token::new(TokenKind::SpanEnd, end_pos));
         Ok(())
+    }
     fn consume_identifier<'a>(&mut self, source: &'a mut InputSource, target: &mut TokenBuffer) -> Result<&'a [u8], ParseError> {
         let begin_pos = source.pos();
         debug_assert!(is_identifier_start(source.next().unwrap()));
         source.consume();
         // Keep reading until no more identifier
         while let Some(c) = source.next() {
             if !is_identifier_remaining(c) {
                 break;
+            }
             source.consume();
+        }
         self.check_ascii(source)?;
         let end_pos = source.pos();
         target.tokens.push(Token::new(TokenKind::Ident, begin_pos));
         target.tokens.push(Token::new(TokenKind::SpanEnd, end_pos));
         Ok(source.section_at_pos(begin_pos, end_pos))
+    }
     fn consume_number(&mut self, source: &mut InputSource, target: &mut TokenBuffer) -> Result<(), ParseError> {
         let begin_pos = source.pos();
         debug_assert!(is_integer_literal_start(source.next().unwrap()));
         source.consume();
         // Keep reading until it doesn't look like a number anymore
         while let Some(c) = source.next() {
             if !maybe_number_remaining(c) {
                 break;
+            }
             source.consume();
+        }
         self.check_ascii(source)?;
         let end_pos = source.pos();
         target.tokens.push(Token::new(TokenKind::Integer, begin_pos));
         target.tokens.push(Token::new(TokenKind::SpanEnd, end_pos));
         Ok(())
+    }
     // Consumes whitespace and returns whether or not the whitespace contained
     // a newline.
     fn consume_whitespace(&self, source: &mut InputSource) -> bool {
         debug_assert!(is_whitespace(source.next().unwrap()));
         let mut has_newline = false;
         while let Some(c) = source.next() {
             if !is_whitespace(c) {
                 break;
+            }
             if c == b'\n' {
                 has_newline = true;
+            }
             source.consume();
+        }
         has_newline
+    }
     fn add_code_range(
         &mut self, target: &mut TokenBuffer, parent_idx: i32,
         code_start_idx: u32, code_end_idx: u32, next_sibling_idx: i32
     ) {
         let new_range_idx = target.ranges.len() as i32;
         let parent_range = &mut target.ranges[parent_idx as usize];
         debug_assert_ne!(parent_range.end, code_end_idx, "called push_code_range without a need to do so");
         let sibling_idx = parent_range.last_child_idx;
         parent_range.last_child_idx = new_range_idx;
         parent_range.end = code_end_idx;
         parent_range.num_child_ranges += 1;
         let curly_depth = self.curly_stack.len() as u32;
         target.ranges.push(TokenRange{
             parent_idx,
             range_kind: TokenRangeKind::Code,
             curly_depth,
             start: code_start_idx,
             end: code_end_idx,
             num_child_ranges: 0,
             first_child_idx: NO_RELATION,
             last_child_idx: NO_RELATION,
             next_sibling_idx,
         });
         // Fix up the sibling indices
         if sibling_idx != NO_RELATION {
             let sibling_range = &mut target.ranges[sibling_idx as usize];
             sibling_range.next_sibling_idx = new_range_idx;
+        }
+    }
     fn emit_marker(&mut self, target: &mut TokenBuffer, kind: TokenMarkerKind, first_token: u32) {
         debug_assert!(
             target.markers
                 .last().map(|v| v.first_token < first_token)
                 .unwrap_or(true)
         );
         target.markers.push(TokenMarker{
             kind,
             curly_depth: self.curly_stack.len() as u32,
             first_token,
             last_token: u32::MAX,
             handled: false,
         });
+    }
     fn push_range(&mut self, target: &mut TokenBuffer, range_kind: TokenRangeKind, first_token_idx: u32) {
         let new_range_idx = target.ranges.len() as i32;
         let parent_idx = self.stack_idx as i32;
         let parent_range = &mut target.ranges[self.stack_idx];
         if parent_range.first_child_idx == NO_RELATION {
             parent_range.first_child_idx = new_range_idx;
+        }
         let last_registered_idx = parent_range.end;
         if last_registered_idx != first_token_idx {
             self.add_code_range(target, parent_idx, last_registered_idx, first_token_idx, new_range_idx + 1);
+        }
         // Push the new range
         self.stack_idx = target.ranges.len();
         let curly_depth = self.curly_stack.len() as u32;
         target.ranges.push(TokenRange{
             parent_idx,
             range_kind,
             curly_depth,
             start: first_token_idx,
             end: first_token_idx, // modified when popped
             num_child_ranges: 0,
             first_child_idx: NO_RELATION,
             last_child_idx: NO_RELATION,
             next_sibling_idx: NO_RELATION
         })
+    }
     fn pop_range(&mut self, target: &mut TokenBuffer, end_token_idx: u32) {
         let popped_idx = self.stack_idx as i32;
         let popped_range = &mut target.ranges[self.stack_idx];
         debug_assert!(self.stack_idx != 0, "attempting to pop top-level range");
         // Fix up the current range before going back to parent
         popped_range.end = end_token_idx;
         debug_assert_ne!(popped_range.start, end_token_idx);
         // Go back to parent and fix up its child pointers, but remember the
         // last child, so we can link it to the newly popped range.
         self.stack_idx = popped_range.parent_idx as usize;
         let parent = &mut target.ranges[self.stack_idx];
         if parent.first_child_idx == NO_RELATION {
             parent.first_child_idx = popped_idx;
+        }
         let prev_sibling_idx = parent.last_child_idx;
         parent.last_child_idx = popped_idx;
         parent.end = end_token_idx;
         parent.num_child_ranges += 1;
         // Fix up the sibling (if it exists)
         if prev_sibling_idx != NO_RELATION {
             let sibling = &mut target.ranges[prev_sibling_idx as usize];
             sibling.next_sibling_idx = popped_idx;
+        }
+    }
     fn check_ascii(&self, source: &InputSource) -> Result<(), ParseError> {
         match source.next() {
             Some(c) if !c.is_ascii() => {
                 Err(ParseError::new_error_str_at_pos(source, source.pos(), "encountered a non-ASCII character"))
             },
             _else => {
                 Ok(())
             },
+        }
+    }
+}
 // Helpers for characters
 fn demarks_symbol(ident: &[u8]) -> bool {
     return
         ident == KW_STRUCT ||
             ident == KW_ENUM ||
             ident == KW_UNION ||
             ident == KW_FUNCTION ||
             ident == KW_PRIMITIVE ||
             ident == KW_COMPOSITE
+}
 fn demarks_import(ident: &[u8]) -> bool {
     return ident == KW_IMPORT;
+}
 fn is_whitespace(c: u8) -> bool {
     c.is_ascii_whitespace()
+}
 fn is_char_literal_start(c: u8) -> bool {
     return c == b'\'';
+}
 fn is_string_literal_start(c: u8) -> bool {
     return c == b'"';
+}
 fn is_pragma_start_or_pound(c: u8) -> bool {
     return c == b'#';
+}
 fn is_identifier_start(c: u8) -> bool {
     return
         (c >= b'a' && c <= b'z') ||
             (c >= b'A' && c <= b'Z') ||
             c == b'_'
+}
 fn is_identifier_remaining(c: u8) -> bool {
     return
         (c >= b'0' && c <= b'9') ||
             (c >= b'a' && c <= b'z') ||
             (c >= b'A' && c <= b'Z') ||
             c == b'_'
+}
 fn is_integer_literal_start(c: u8) -> bool {
     return c >= b'0' && c <= b'9';
+}
 fn maybe_number_remaining(c: u8) -> bool {
     // Note: hex range includes the possible binary indicator 'b' and 'B';
     return
         (c == b'o' || c == b'O' || c == b'x' || c == b'X') ||
             (c >= b'0' && c <= b'9') || (c >= b'A' && c <= b'F') || (c >= b'a' && c <= b'f') ||
             c == b'_';
+}

src/protocol/parser/tokens.rs

➞

Show inline comments

@@ @@ -4,369 +4,335 @@ use crate::protocol::input_source::{ @@
 };
 /// Represents a particular kind of token. Some tokens represent
 /// variable-character tokens. Such a token is always followed by a
 /// `TokenKind::SpanEnd` token.
 #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
 pub enum TokenKind {
     // Variable-character tokens, followed by a SpanEnd token
     Ident,          // regular identifier
     Pragma,         // identifier with prefixed `#`, range includes `#`
     Integer,        // integer literal
     String,         // string literal, range includes `"`
     Character,      // character literal, range includes `'`
     LineComment,    // line comment, range includes leading `//`, but not newline
     BlockComment,   // block comment, range includes leading `/*` and trailing `*/`
     // Punctuation (single character)
     Exclamation,    // !
     Question,       // ?
     Pound,          // #
     OpenAngle,      // <
     OpenCurly,      // {
     OpenParen,      // (
     OpenSquare,     // [
     CloseAngle,     // >
     CloseCurly,     // }
     CloseParen,     // )
     CloseSquare,    // ]
     Colon,          // :
     Comma,          // ,
     Dot,            // .
     SemiColon,      // ;
     // Operator-like (single character)
     At,             // @
     Plus,           // +
     Minus,          // -
     Star,           // *
     Slash,          // /
     Percent,        // %
     Caret,          // ^
     And,            // &
     Or,             // |
     Tilde,          // ~
     Equal,          // =
     // Punctuation (two characters)
     ColonColon,     // ::
     DotDot,         // ..
     ArrowRight,     // ->
     // Operator-like (two characters)
     AtEquals,       // @=
     PlusPlus,       // ++
     PlusEquals,     // +=
     MinusMinus,     // --
     MinusEquals,    // -=
     StarEquals,     // *=
     SlashEquals,    // /=
     PercentEquals,  // %=
     CaretEquals,    // ^=
     AndAnd,         // &&
     AndEquals,      // &=
     OrOr,           // ||
     OrEquals,       // |=
     EqualEqual,     // ==
     NotEqual,       // !=
     ShiftLeft,      // <<
     LessEquals,     // <=
     ShiftRight,     // >>
     GreaterEquals,  // >=
     // Operator-like (three characters)
     ShiftLeftEquals,// <<=
     ShiftRightEquals, // >>=
     // Special marker token to indicate end of variable-character tokens
     SpanEnd,
+}
 impl TokenKind {
     /// Returns true if the next expected token is the special `TokenKind::SpanEnd` token. This is
     /// the case for tokens of variable length (e.g. an identifier).
     pub(crate) fn has_span_end(&self) -> bool {
         return *self <= TokenKind::BlockComment
+    }
     /// Returns the number of characters associated with the token. May only be called on tokens
     /// that do not have a variable length.
     fn num_characters(&self) -> u32 {
         debug_assert!(!self.has_span_end() && *self != TokenKind::SpanEnd);
         if *self <= TokenKind::Equal {
         } else if *self <= TokenKind::GreaterEquals {
         } else {
+        }
+    }
     /// Returns the characters that are represented by the token, may only be called on tokens that
     /// do not have a variable length.
     pub fn token_chars(&self) -> &'static str {
         debug_assert!(!self.has_span_end() && *self != TokenKind::SpanEnd);
         use TokenKind as TK;
         match self {
             TK::Exclamation => "!",
             TK::Question => "?",
             TK::Pound => "#",
             TK::OpenAngle => "<",
             TK::OpenCurly => "{",
             TK::OpenParen => "(",
             TK::OpenSquare => "[",
             TK::CloseAngle => ">",
             TK::CloseCurly => "}",
             TK::CloseParen => ")",
             TK::CloseSquare => "]",
             TK::Colon => ":",
             TK::Comma => ",",
             TK::Dot => ".",
             TK::SemiColon => ";",
             TK::At => "@",
             TK::Plus => "+",
             TK::Minus => "-",
             TK::Star => "*",
             TK::Slash => "/",
             TK::Percent => "%",
             TK::Caret => "^",
             TK::And => "&",
             TK::Or => "|",
             TK::Tilde => "~",
             TK::Equal => "=",
             TK::ColonColon => "::",
             TK::DotDot => "..",
             TK::ArrowRight => "->",
             TK::AtEquals => "@=",
             TK::PlusPlus => "++",
             TK::PlusEquals => "+=",
             TK::MinusMinus => "--",
             TK::MinusEquals => "-=",
             TK::StarEquals => "*=",
             TK::SlashEquals => "/=",
             TK::PercentEquals => "%=",
             TK::CaretEquals => "^=",
             TK::AndAnd => "&&",
             TK::AndEquals => "&=",
             TK::OrOr => "||",
             TK::OrEquals => "|=",
             TK::EqualEqual => "==",
             TK::NotEqual => "!=",
             TK::ShiftLeft => "<<",
             TK::LessEquals => "<=",
             TK::ShiftRight => ">>",
             TK::GreaterEquals => ">=",
             TK::ShiftLeftEquals => "<<=",
             TK::ShiftRightEquals => ">>=",
             // Lets keep these in explicitly for now, in case we want to add more symbols
             TK::Ident | TK::Pragma | TK::Integer | TK::String | TK::Character |
             TK::LineComment | TK::BlockComment | TK::SpanEnd => unreachable!(),
+        }
+    }
+}
 /// Represents a single token at a particular position.
 pub struct Token {
     pub kind: TokenKind,
     pub pos: InputPosition,
+}
 impl Token {
     pub(crate) fn new(kind: TokenKind, pos: InputPosition) -> Self {
         Self{ kind, pos }
+    }
+}
 #[derive(Debug, Clone, Copy)]
 pub enum TokenMarkerKind {
     Pragma,
     Import,
     Definition,
+}
 /// A marker for a specific token. These are stored separately from the array of
 /// tokens. These are used for initial symbol, module name, and import
 /// discovery.
 #[derive(Debug)]
 pub struct TokenMarker {
     pub kind: TokenMarkerKind,
     pub curly_depth: u32,
     // Indices into token buffer. The first token is inclusive and set upon
     // tokenization, the last token is set at a later stage in parsing (e.g.
     // at symbol discovery we may parse some of the `Pragma` tokens and set the
     // last parsed token)
     pub first_token: u32,
     pub last_token: u32,
     pub handled: bool,
+}
 /// The kind of token ranges that are specially parsed by the tokenizer.
 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
 pub enum TokenRangeKind {
     Module,
     Pragma,
     Import,
     Definition,
     Code,
+}
 pub const NO_RELATION: i32 = -1;
 pub const NO_SIBLING: i32 = NO_RELATION;
 /// A range of tokens with a specific meaning. Such a range is part of a tree
 /// where each parent tree envelops all of its children.
 #[derive(Debug)]
 pub struct TokenRange {
     // Index of parent in `TokenBuffer.ranges`, does not have a parent if the
     // range kind is Module, in that case the parent index is -1.
     pub parent_idx: i32,
     pub range_kind: TokenRangeKind,
     pub curly_depth: u32,
     // Offsets into `TokenBuffer.ranges`: the tokens belonging to this range.
     pub start: u32,             // first token (inclusive index)
     pub end: u32,               // last token (exclusive index)
     // Child ranges
     pub num_child_ranges: u32,  // Number of subranges
     pub first_child_idx: i32,   // First subrange (or -1 if no subranges)
     pub last_child_idx: i32,    // Last subrange (or -1 if no subranges)
     pub next_sibling_idx: i32,  // Next subrange (or -1 if no next subrange)
+}
 pub struct TokenBuffer {
     pub tokens: Vec<Token>,
     pub markers: Vec<TokenMarker>,
     pub ranges: Vec<TokenRange>,
+}
 impl TokenBuffer {
     pub(crate) fn new() -> Self {
         return Self{
             tokens: Vec::new(),
             markers: Vec::new(),
             ranges: Vec::new()
         };
+    }
     pub(crate) fn iter_range(
         &self, inclusive_start: u32, exclusive_end: Option<u32>
     ) -> TokenIter {
         let exclusive_end = exclusive_end.unwrap_or(self.tokens.len() as u32) as usize;
         debug_assert!(exclusive_end <= self.tokens.len());
         TokenIter::new(self, inclusive_start as usize, exclusive_end)
+    }
+}
 /// Iterator over tokens within a specific `TokenRange`.
 pub(crate) struct TokenIter<'a> {
     tokens: &'a Vec<Token>,
     cur: usize,
     end: usize,
+}
 impl<'a> TokenIter<'a> {
     fn new(buffer: &'a TokenBuffer, start: usize, end: usize) -> Self {
         Self{ tokens: &buffer.tokens, cur: start, end }
+    }
     /// Returns the next token (may include comments), or `None` if at the end
     /// of the range.
     pub(crate) fn next_including_comments(&self) -> Option<TokenKind> {
         if self.cur >= self.end {
             return None;
+        }
         let token = &self.tokens[self.cur];
         Some(token.kind)
+    }
     /// Returns the next token (but skips over comments), or `None` if at the
     /// end of the range
     pub(crate) fn next(&mut self) -> Option<TokenKind> {
         while let Some(token_kind) = self.next_including_comments() {
             if token_kind != TokenKind::LineComment && token_kind != TokenKind::BlockComment {
                 return Some(token_kind);
+            }
             self.consume();
+        }
         return None
+    }
     /// Peeks ahead by one token (i.e. the one that comes after `next()`), and
     /// skips over comments
     pub(crate) fn peek(&self) -> Option<TokenKind> {
         for next_idx in self.cur + 1..self.end {
             let next_kind = self.tokens[next_idx].kind;
             if next_kind != TokenKind::LineComment && next_kind != TokenKind::BlockComment && next_kind != TokenKind::SpanEnd {
                 return Some(next_kind);
+            }
+        }
         return None;
+    }
     /// Returns the start position belonging to the token returned by `next`. If
     /// there is not a next token, then we return the end position of the
     /// previous token.
     pub(crate) fn last_valid_pos(&self) -> InputPosition {
         if self.cur < self.end {
             // Return token position
             return self.tokens[self.cur].pos
+        }
         // Return previous token end
         let token = &self.tokens[self.cur - 1];
         return if token.kind == TokenKind::SpanEnd {
             token.pos
         } else {
             token.pos.with_offset(token.kind.num_characters())
         };
+    }
     /// Assumes the token is not at the end and returns the starting position
     /// belonging to the token returned by `next`.
     pub(crate) fn next_start_position(&self) -> InputPosition {
         debug_assert!(self.cur < self.end);
         return self.tokens[self.cur].pos;
+    }
     /// Returns the token range belonging to the token returned by `next`. This
     /// assumes that we're not at the end of the range we're iterating over.
     pub(crate) fn next_positions(&self) -> (InputPosition, InputPosition) {
         debug_assert!(self.cur < self.end);
         let token = &self.tokens[self.cur];
         if token.kind.has_span_end() {
             let span_end = &self.tokens[self.cur + 1];
             debug_assert_eq!(span_end.kind, TokenKind::SpanEnd);
             (token.pos, span_end.pos)
         } else {
             let offset = token.kind.num_characters();
             (token.pos, token.pos.with_offset(offset))
+        }
+    }
     /// See `next_positions`
     pub(crate) fn next_span(&self) -> InputSpan {
         let (begin, end) = self.next_positions();
         return InputSpan::from_positions(begin, end)
+    }
     /// Advances the iterator to the next (meaningful) token.
     pub(crate) fn consume(&mut self) {
         if let Some(kind) = self.next_including_comments() {
             if kind.has_span_end() {
                 self.cur += 2;
             } else {
                 self.cur += 1;
+            }
+        }
+    }
     pub(crate) fn token_index(&self) -> u32 {
         return self.cur as u32;
+    }
     /// Saves the current iteration position, may be passed to `load` to return
     /// the iterator to a previous position.
     pub(crate) fn save(&self) -> (usize, usize) {
         (self.cur, self.end)
+    }
     pub(crate) fn load(&mut self, saved: (usize, usize)) {
         self.cur = saved.0;
         self.end = saved.1;
+    }
+}
@@ \ No newline at end of file @@

src/protocol/token_writer.rs

➞

Show inline comments

 #![allow(dead_code)]
 use std::fmt::Write;
+use std::fmt::{Write, Error as FmtError};
 use std::io::Write as IOWrite;
 use crate::protocol::input_source::{InputSource, InputSpan};
 use crate::protocol::parser::Module;
-use crate::protocol::tokens::{Token, TokenKind, TokenRange};
+use crate::protocol::tokens::{Token, TokenKind, TokenMarker};
 pub(crate) struct TokenWriter {
     buffer: String,
+}
 impl TokenWriter {
     pub(crate) fn new() -> Self {
         return Self{
             buffer: String::with_capacity(4096),
+        }
+    }
     pub(crate) fn write<W: IOWrite>(&mut self, w: &mut W, modules: &[Module]) {
         self.buffer.clear();
         for module in modules {
             self.write_module_tokens(module);
+        }
         w.write_all(self.buffer.as_bytes()).expect("write tokens");
+    }
     fn write_module_tokens(&mut self, module: &Module) {
         self.write_dashed_indent(0);
         match &module.name {
             Some(name) => writeln!(self.buffer, "Module: {}", name.1.as_str()).unwrap(),
             None => self.buffer.push_str("Unnamed module\n"),
+        }
         let mut range_index = -1;
         if !module.tokens.ranges.is_empty() {
             range_index = 0;
+        }
         while range_index >= 0 {
             range_index = self.write_token_range(
                 &module.source, &module.tokens.tokens, &module.tokens.ranges, range_index, 1
             );
         self.write_marker_array(&module.tokens.markers, 1).expect("write markers");
         self.write_token_array(&module.source, &module.tokens.tokens, 1).expect("write tokens");
+    }
+    }
     /// Writes a single token range. Recurses if there are any child ranges.
     /// Returns the next token range index to iterate over (or a negative
     /// number, if there are no more sibling ranges).
     fn write_token_range(&mut self, source: &InputSource, tokens: &[Token], ranges: &[TokenRange], range_index: i32, indent: u32) -> i32 {
         // Write range kind
         let range = &ranges[range_index as usize];
         self.write_dashed_indent(indent);
         writeln!(self.buffer, "Range: {:?}", range.range_kind);
         // Write tokens/lines it spans
         let first_token_pos = tokens[range.start as usize].pos;
         let last_token_pos = if (range.end as usize) < tokens.len() {
             tokens[range.end as usize].pos
         } else {
             tokens.last().unwrap().pos
         };
         let first_source_col = source.get_column(first_token_pos);
         let last_source_col = source.get_column(last_token_pos);
         self.write_indent(indent);
         writeln!(
             self.buffer, "Source: token {} to {}, file {}:{}:{} to {}:{}",
             range.start, range.end, source.filename,
             first_token_pos.line, first_source_col,
             last_token_pos.line, last_source_col
         );
         let next_sibling_index = range.next_sibling_idx;
         if range.num_child_ranges == 0 {
             // No child ranges, so dump the tokens here
             debug_assert!(range.first_child_idx < 0);
             self.write_token_array(source, tokens, range, indent);
         } else {
             // Child ranges
             debug_assert!(range.first_child_idx >= 0);
     fn write_marker_array(&mut self, markers: &[TokenMarker], indent: u32) -> Result<(), FmtError> {
         self.write_indent(indent);
-            writeln!(self.buffer, "Children: [");
+        writeln!(self.buffer, "Markers: [")?;
             let mut range_index = range.first_child_idx;
             while range_index >= 0 {
                 range_index = self.write_token_range(source, tokens, ranges, range_index, indent + 1);
         let marker_indent = indent + 1;
         for marker in markers {
             self.write_indent(marker_indent);
             writeln!(self.buffer, "{:?}", marker)?;
+        }
         self.write_indent(indent);
             writeln!(self.buffer, "]");
+        }
         writeln!(self.buffer, "]")?;
         // Wrote everything, return the next sibling token range
         return next_sibling_index;
         return Ok(());
+    }
-    fn write_token_array(&mut self, source: &InputSource, tokens: &[Token], range: &TokenRange, indent: u32) {
+    fn write_token_array(&mut self, source: &InputSource, tokens: &[Token], indent: u32) -> Result<(), FmtError> {
         self.write_indent(indent);
         writeln!(self.buffer, "Tokens: [");
+        writeln!(self.buffer, "Tokens: [")?;
         let num_tokens = tokens.len();
         let token_indent = indent + 1;
-        for token_index in range.start as usize..range.end as usize {
+        for token_index in 0..num_tokens {
             // Skip uninteresting tokens
             let token = &tokens[token_index];
             if token.kind == TokenKind::SpanEnd {
                 continue;
+            }
             self.write_indent(token_indent);
             write!(self.buffer, "{:?} (index {})", token.kind, token_index);
+            write!(self.buffer, "{:?} (index {})", token.kind, token_index)?;
             if token.kind.has_span_end() {
                 let token_start = token.pos;
                 let token_end = tokens[token_index + 1].pos;
                 let section = source.section_at_span(InputSpan::from_positions(token_start, token_end));
                 writeln!(self.buffer, " text: {}", String::from_utf8_lossy(section));
+                writeln!(self.buffer, " text: {}", String::from_utf8_lossy(section))?;
             } else {
                 self.buffer.push('\n');
+            }
+        }
         self.write_indent(indent);
         writeln!(self.buffer, "]");
         writeln!(self.buffer, "]")?;
         return Ok(());
+    }
     fn write_dashed_indent(&mut self, indent: u32) {
         for _ in 0..indent * 2 {
             self.buffer.push(' ');
+        }
         self.buffer.push('-');
         self.buffer.push(' ');
+    }
     fn write_indent(&mut self, indent: u32) {
         for _ in 0..(indent + 1)*2 {
             self.buffer.push(' ');
+        }
+    }
+}
@@ \ No newline at end of file @@

0 comments (0 inline, 0 general)