CSY/reowolf Changeset - 7f25ee16c39b · Centrum Wiskunde & Informatica (CWI)

Changeset - 7f25ee16c39b

Parent rev.

Child rev.

[Not reviewed]

0 6 1

MH - 4 years ago 2021-04-14 19:30:38
contact@maxhenger.nl

WIP on compiler rearchitecting

7 files changed with 964 insertions and 158 deletions:

src/collections/string_pool.rs

src/protocol/ast.rs

src/protocol/input_source2.rs

199

src/protocol/lexer2.rs

406

src/protocol/parser/mod.rs

src/protocol/parser/symbol_table2.rs

107

src/protocol/tokenizer/mod.rs

213

0 comments (0 inline, 0 general)

src/collections/string_pool.rs

➞

Show inline comments

 use std::ptr::null_mut;
 use std::collections::hash_map::DefaultHasher;
 use std::hash::{Hash, Hasher};
 const SLAB_SIZE: usize = u16::max_value() as usize;
 #[derive(Clone)]
 pub struct StringRef {
     data: *const u8,
     length: usize,
@@ @@ -16,6 +19,22 @@ impl StringRef { @@
+    }
+}
 impl PartialEq for StringRef {
     fn eq(&self, other: &StringRef) -> bool {
         self.as_str() == other.as_str()
+    }
+}
 impl Eq for StringRef {}
 impl Hash for StringRef {
     fn hash<H: Hasher>(&self, state: &mut H) {
         unsafe{
             state.write(std::slice::from_raw_parts(self.data, self.length));
+        }
+    }
+}
 struct StringPoolSlab {
     prev: *mut StringPoolSlab,
     data: Vec<u8>,
@@ @@ -60,7 +79,7 @@ impl StringPool { @@
             last = unsafe{&mut *self.last};
+        }
         // Must fit now
+        // Must fit now, compute hash and put in buffer
         debug_assert!(data_len <= last.remaining);
         let range_start = last.data.len();
         last.data.extend_from_slice(data);

src/protocol/ast.rs

➞

Show inline comments

@@ @@ -6,7 +6,9 @@ use std::fmt::{Debug, Display, Formatter}; @@
 use std::ops::{Index, IndexMut};
 use super::arena::{Arena, Id};
 use crate::collections::StringRef;
 use crate::protocol::inputsource::*;
 use crate::protocol::input_source2::{InputPosition2, InputSpan};
 /// Global limits to the AST, should be checked by lexer and parser. Some are
 /// arbitrary
@@ @@ -238,7 +240,7 @@ impl Index<ChannelStatementId> for Heap { @@
 pub struct Root {
     pub this: RootId,
     // Phase 1: parser
     pub position: InputPosition,
+    // pub position: InputPosition,
     pub pragmas: Vec<PragmaId>,
     pub imports: Vec<ImportId>,
     pub definitions: Vec<DefinitionId>,
@@ @@ -264,14 +266,23 @@ impl SyntaxElement for Root { @@
 #[derive(Debug, Clone)]
 pub enum Pragma {
     Version(PragmaVersion),
     Module(PragmaModule)
     Module(PragmaModule),
+}
 impl Pragma {
     pub(crate) fn as_module(&self) -> &PragmaModule {
         match self {
             Pragma::Module(pragma) => pragma,
             _ => unreachable!("Tried to obtain {:?} as PragmaModule", self),
+        }
+    }
+}
 #[derive(Debug, Clone)]
 pub struct PragmaVersion {
     pub this: PragmaId,
     // Phase 1: parser
-    pub position: InputPosition,
+    pub span: InputSpan, // of full pragma
     pub version: u64,
+}
@@ @@ -279,22 +290,8 @@ pub struct PragmaVersion { @@
 pub struct PragmaModule {
     pub this: PragmaId,
     // Phase 1: parser
     pub position: InputPosition,
     pub value: Vec<u8>,
+}
 #[derive(Debug, Clone)]
 pub struct PragmaOld {
     pub this: PragmaId,
     // Phase 1: parser
     pub position: InputPosition,
     pub value: Vec<u8>,
+}
 impl SyntaxElement for PragmaOld {
     fn position(&self) -> InputPosition {
         self.position
+    }
     pub span: InputSpan, // of full pragma
     pub value: Identifier,
+}
 #[derive(Debug, Clone)]
@@ @@ -365,8 +362,8 @@ pub struct ImportSymbols { @@
 #[derive(Debug, Clone)]
 pub struct Identifier {
     pub position: InputPosition,
     pub value: Vec<u8>
     pub span: InputSpan,
     pub value: StringRef,
+}
 impl PartialEq for Identifier {

src/protocol/input_source2.rs

➞

Show inline comments

 use std::fmt;
 use std::cell::{Ref, RefCell};
 use std::fmt::Write;
 #[derive(Debug, Clone, Copy)]
 pub struct InputPosition2 {
@@ @@ -7,6 +8,13 @@ pub struct InputPosition2 { @@
     pub offset: u32,
+}
 impl InputPosition2 {
     pub(crate) fn with_offset(&self, offset: u32) -> Self {
         InputPosition2{ line: self.line, offset: self.offset + offset }
+    }
+}
 #[derive(Debug, Clone, Copy)]
 pub struct InputSpan {
     pub begin: InputPosition2,
     pub end: InputPosition2,
@@ @@ -14,7 +22,7 @@ pub struct InputSpan { @@
 impl InputSpan {
     #[inline]
     fn from_positions(begin: InputPosition2, end: InputPosition2) -> Self {
+    pub fn from_positions(begin: InputPosition2, end: InputPosition2) -> Self {
         Self { begin, end }
+    }
+}
@@ @@ -75,8 +83,8 @@ impl InputSource2 { @@
+        }
+    }
     pub fn section(&self, start: u32, end: u32) -> &[u8] {
         &self.input[start as usize..end as usize]
     pub fn section(&self, start: InputPosition2, end: InputPosition2) -> &[u8] {
         &self.input[start.offset as usize..end.offset as usize]
+    }
     // Consumes the next character. Will check well-formedness of newlines: \r
@@ @@ -145,11 +153,14 @@ impl InputSource2 { @@
         return lookup;
+    }
     /// Retrieves offset at which line starts (right after newline)
     fn lookup_line_start_offset(&self, line_number: u32) -> u32 {
         let lookup = self.get_lookup();
         lookup[line_number as usize]
+    }
     /// Retrieves offset at which line ends (at the newline character or the
     /// preceding carriage feed for \r\n-encoded newlines)
     fn lookup_line_end_offset(&self, line_number: u32) -> u32 {
         let lookup = self.get_lookup();
         let offset = lookup[(line_number + 1) as usize] - 1;
@@ @@ -169,79 +180,186 @@ impl InputSource2 { @@
+}
 #[derive(Debug)]
-pub enum ParseErrorType {
+pub enum StatementKind {
     Info,
     Error
+}
 #[derive(Debug)]
 pub enum ContextKind {
     SingleLine,
     MultiLine,
+}
 #[derive(Debug)]
 pub struct ParseErrorStatement {
     pub(crate) error_type: ParseErrorType,
     pub(crate) line: u32,
     pub(crate) column: u32,
     pub(crate) offset: u32,
     pub(crate) statement_kind: StatementKind,
     pub(crate) context_kind: ContextKind,
     pub(crate) start_line: u32,
     pub(crate) start_column: u32,
     pub(crate) end_line: u32,
     pub(crate) end_column: u32,
     pub(crate) filename: String,
     pub(crate) context: String,
     pub(crate) message: String,
+}
 impl ParseErrorStatement {
-    fn from_source(error_type: ParseErrorType, source: &InputSource2, position: InputPosition2, msg: &str) -> Self {
+    fn from_source_at_pos(statement_kind: StatementKind, source: &InputSource2, position: InputPosition2, message: String) -> Self {
         // Seek line start and end
         let line_start = source.lookup_line_start_offset(position.line);
         let line_end = source.lookup_line_end_offset(position.line);
         let context = Self::create_context(source, line_start as usize, line_end as usize);
         debug_assert!(position.offset >= line_start);
         let column = position.offset - line_start + 1;
         Self{
             error_type,
             line: position.line,
             column,
             offset: position.offset,
             statement_kind,
             context_kind: ContextKind::SingleLine,
             start_line: position.line,
             start_column: column,
             end_line: position.line,
             end_column: column + 1,
             filename: source.filename.clone(),
             context,
             message,
+        }
+    }
     fn from_source_at_span(statement_kind: StatementKind, source: &InputSource2, span: InputSpan, message: String) -> Self {
         debug_assert!(span.end.line >= span.begin.line);
         debug_assert!(span.end.offset >= span.begin.offset);
         let first_line_start = source.lookup_line_start_offset(span.begin.line);
         let last_line_start = source.lookup_line_start_offset(span.end.line);
         let last_line_end = source.lookup_line_end_offset(span.end.line);
         let context = Self::create_context(source, first_line_start as usize, last_line_end as usize);
         debug_assert!(span.begin.offset >= first_line_start);
         let start_column = span.begin.offset - first_line_start + 1;
         let end_column = span.end.offset - last_line_start + 1;
         let context_kind = if span.begin.line == span.end.line {
             ContextKind::SingleLine
         } else {
             ContextKind::MultiLine
         };
         Self{
             statement_kind,
             context_kind,
             start_line: first_line_start,
             start_column,
             end_line: last_line_start,
             end_column,
             filename: source.filename.clone(),
             context: String::from_utf8_lossy(&source.input[line_start as usize..line_end as usize]).to_string(),
             message: msg.to_string()
             context,
             message,
+        }
+    }
     /// Produces context from source
     fn create_context(source: &InputSource2, start: usize, end: usize) -> String {
         let context_raw = &source.input[start..end];
         String::from_utf8_lossy(context_raw).to_string()
+    }
+}
 impl fmt::Display for ParseErrorStatement {
     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
         // Write message
         match self.error_type {
             ParseErrorType::Info => write!(f, " INFO: ")?,
             ParseErrorType::Error => write!(f, "ERROR: ")?,
         // Write kind of statement and message
         match self.statement_kind {
             StatementKind::Info => f.write_str(" INFO: ")?,
             StatementKind::Error => f.write_str("ERROR: ")?,
+        }
         writeln!(f, "{}", &self.message)?;
         f.write_str(&self.message)?;
         f.write_char('\n')?;
         // Write originating file/line/column
         if self.filename.is_empty() {
             writeln!(f, " +- at {}:{}", self.line, self.column)?;
         } else {
             writeln!(f, " +- at {}:{}:{}", self.filename, self.line, self.column)?;
         f.write_str(" +- ")?;
         if !self.filename.is_empty() {
             write!(f, "in {} ", self.filename)?;
+        }
         match self.context_kind {
             ContextKind::SingleLine => writeln!(f, " at {}:{}", self.start_line, self.start_column),
             ContextKind::MultiLine => writeln!(
                 f, " from {}:{} to {}:{}",
                 self.start_line, self.start_column, self.end_line, self.end_column
+            )
         }?;
         // Helper function for writing context: converting tabs into 4 spaces
         // (oh, the controversy!) and creating an annotated line
         fn transform_context(source: &str, target: &mut String) {
             for char in source.chars() {
                 if char == '\t' {
                     target.push_str("    ");
                 } else {
                     target.push(char);
+                }
+            }
+        }
         fn extend_annotation(first_col: u32, last_col: u32, source: &str, target: &mut String, extend_char: char) {
             debug_assert!(first_col > 0 && last_col > first_col);
             for (char_idx, char) in source.chars().enumerate().skip(first_col as usize - 1) {
                 if char_idx == last_col as usize {
                     break;
+                }
                 if char == '\t' {
                     for _ in 0..4 { target.push(extend_char); }
                 } else {
                     target.push(extend_char);
+                }
+            }
+        }
         // Write source context
         writeln!(f, " | ")?;
         writeln!(f, " | {}", self.context)?;
         // Write underline indicating where the error ocurred
         debug_assert!(self.column as usize <= self.context.chars().count());
         let mut arrow = String::with_capacity(self.context.len() + 3);
         arrow.push_str(" | ");
         let mut char_col = 1;
         for char in self.context.chars() {
             if char_col == self.column { break; }
             if char == '\t' {
                 arrow.push('\t');
             } else {
                 arrow.push(' ');
+            }
             char_col += 1;
         let mut context = String::with_capacity(128);
         let mut annotation = String::with_capacity(128);
         match self.context_kind {
             ContextKind::SingleLine => {
                 // Write single line of context with indicator for the offending
                 // span underneath.
                 transform_context(&self.context, &mut context);
                 context.push('\n');
                 f.write_str(&context)?;
                 annotation.push_str(" | ");
                 extend_annotation(1, self.start_column, &self.source, &mut annotation, ' ');
                 extend_annotation(self.start_column, self.end_column, &self.source, &mut annotation, '~');
                 annotation.push('\n');
                 f.write_str(&annotation)?;
             },
             ContextKind::MultiLine => {
                 // Annotate all offending lines
                 // - first line
                 let mut lines = self.context.lines();
                 let first_line = lines.next().unwrap();
                 transform_context(first_line, &mut context);
                 writeln!(" |- {}", &context)?;
                 // - remaining lines
                 let mut last_line = first_line;
                 while let Some(cur_line) = lines.next() {
                     context.clear();
                     transform_context(cur_line, &mut context);
                     writeln!(" |  {}", &context);
                     last_line = cur_line;
+                }
                 // - underline beneath last line
                 annotation.push_str(" \\__");
                 extend_annotation(1, self.end_column, &last_line, &mut annotation, '_');
                 annotation.push_str("/\n");
                 f.write_str(&annotation)?;
+            }
+        }
         arrow.push('^');
         writeln!(f, "{}", arrow)?;
         Ok(())
+    }
@@ @@ -273,21 +391,53 @@ impl ParseError { @@
         Self{ statements: Vec::new() }
+    }
     pub fn new_error(source: &InputSource2, position: InputPosition2, msg: &str) -> Self {
         Self{ statements: vec!(ParseErrorStatement::from_source(ParseErrorType::Error, source, position, msg))}
     pub fn new_error_at_pos(source: &InputSource2, position: InputPosition2, message: String) -> Self {
         Self{ statements: vec!(ParseErrorStatement::from_source_at_pos(
             StatementKind::Error, source, position, message
         )) }
+    }
     pub fn new_error_str_at_pos(source: &InputSource2, position: InputPosition2, message: &str) -> Self {
         Self{ statements: vec!(ParseErrorStatement::from_source_at_pos(
             StatementKind::Error, source, position, message.to_string()
         )) }
+    }
     pub fn new_error_at_span(source: &InputSource2, span: InputSpan, message: String) -> Self {
         Self{ statements: vec!(ParseErrorStatement::from_source_at_span(
             StatementKind::Error, source, span, message
         )) }
+    }
     pub fn new_error_str_at_span(source: &InputSource2, span: InputSpan, message: &str) -> Self {
         Self{ statements: vec!(ParseErrorStatement::from_source_at_span(
             StatementKind::Error, source, span, message.to_string()
         )) }
+    }
     pub fn with_prefixed(mut self, error_type: ParseErrorType, source: &InputSource2, position: InputPosition2, msg: &str) -> Self {
         self.statements.insert(0, ParseErrorStatement::from_source(error_type, source, position, msg));
     pub fn with_at_pos(mut self, error_type: StatementKind, source: &InputSource2, position: InputPosition2, message: String) -> Self {
         self.statements.push(ParseErrorStatement::from_source_at_pos(error_type, source, position, message));
         self
+    }
     pub fn with_postfixed(mut self, error_type: ParseErrorType, source: &InputSource2, position: InputPosition2, msg: &str) -> Self {
         self.statements.push(ParseErrorStatement::from_source(error_type, source, position, msg));
     pub fn with_at_span(mut self, error_type: StatementKind, source: &InputSource2, span: InputSpan, message: String) -> Self {
         self.statements.push(ParseErrorStatement::from_source_at_span(error_type, source, span, message.to_string()));
         self
+    }
     pub fn with_postfixed_info(self, source: &InputSource2, position: InputPosition2, msg: &str) -> Self {
         self.with_postfixed(ParseErrorType::Info, source, position, msg)
     pub fn with_info_at_pos(self, source: &InputSource2, position: InputPosition2, msg: String) -> Self {
         self.with_at_pos(StatementKind::Info, source, position, msg)
+    }
     pub fn with_info_str_at_pos(self, source: &InputSource2, position: InputPosition2, msg: &str) -> Self {
         self.with_at_pos(StatementKind::Info, source, position, msg.to_string())
+    }
     pub fn with_info_at_span(self, source: &InputSource2, span: InputSpan, msg: String) -> Self {
         self.with_at_span(StatementKind::Info, source, span, msg)
+    }
     pub fn with_info_str_at_span(self, source: &InputSource2, span: InputSpan, msg: &str) -> Self {
         self.with_at_span(StatementKind::Info, source, span, msg.to_string())
+    }
+}

src/protocol/lexer2.rs

➞

Show inline comments

 use crate::protocol::ast::*;
 use crate::protocol::Heap;
 use crate::protocol::tokenizer::{TokenBuffer, Token};
 use crate::protocol::input_source2::{InputSource2 as InputSource, ParseError};
 use crate::collections::{StringPool, StringRef};
 use crate::protocol::tokenizer::*;
 use crate::protocol::input_source2::{InputSource2 as InputSource, InputPosition2 as InputPosition, InputSpan, ParseError};
 use crate::protocol::symbol_table2::*;
 #[derive(PartialEq, Eq)]
 enum ModuleCompilationPhase {
     Source,                 // only source is set
     Tokenized,              // source is tokenized
     DefinitionsScanned,     // all definitions are linked to their type class
     ImportsResolved,        // all imports are added to the symbol table
     Parsed,                 // produced the AST for the module
     ValidatedAndLinked,     // AST is traversed and has linked the required AST nodes
     Typed,                  // Type inference and checking has been performed
+}
 enum KeywordDefinition {
     Struct,
     Enum,
     Union,
     Function,
     Primitive,
     Composite,
+}
 impl KeywordDefinition {
     fn as_symbol_class(&self) -> SymbolClass {
         use KeywordDefinition as KD;
         use SymbolClass as SC;
         match self {
             KD::Struct => SC::Struct,
             KD::Enum => SC::Enum,
             KD::Union => SC::Union,
             KD::Function => SC::Function,
             KD::Primitive | KD::Composite => SC::Component,
+        }
+    }
+}
 struct Module {
     // Buffers
     source: InputSource,
     tokens: TokenBuffer,
     // Identifiers
     root_id: RootId,
     name: Option<(PragmaId, StringRef)>,
     version: Option<(PragmaId, i64)>,
     phase: ModuleCompilationPhase,
+}
 struct Ctx<'a> {
     heap: &'a mut Heap,
     source: &'a InputSource,
     tokens: &'a TokenBuffer,
     symbols: &'a mut SymbolTable,
     pool: &'a mut StringPool,
+}
 /// Scans the module and finds all module-level type definitions. These will be
 /// added to the symbol table such that during AST-construction we know which
 /// identifiers point to types. Will also parse all pragmas to determine module
 /// names.
 pub(crate) struct ASTSymbolPrePass {
     symbols: Vec<Symbol>,
     pragmas: Vec<PragmaId>,
     buffer: String,
     has_pragma_version: bool,
     has_pragma_module: bool,
+}
 impl ASTSymbolPrePass {
     pub(crate) fn new() -> Self {
         Self{
             symbols: Vec::with_capacity(128),
             pragmas: Vec::with_capacity(8),
             buffer: String::with_capacity(128),
             has_pragma_version: false,
             has_pragma_module: false,
+        }
+    }
     fn reset(&mut self) {
         self.symbols.clear();
         self.pragmas.clear();
         self.has_pragma_version = false;
         self.has_pragma_module = false;
+    }
     pub(crate) fn parse(&mut self, modules: &mut [Module], module_idx: usize, ctx: &mut Ctx) -> Result<(), ParseError> {
         self.reset();
         let module = &mut modules[module_idx];
         let module_range = &module.tokens.ranges[0];
         let expected_parent_idx = 0;
         let expected_subranges = module_range.subranges;
         debug_assert_eq!(module.phase, ModuleCompilationPhase::Tokenized);
         debug_assert_eq!(module_range.range_kind, TokenRangeKind::Module);
         debug_assert_eq!(module.root_id.index, 0);
         // Preallocate root in the heap
         let root_id = ctx.heap.alloc_protocol_description(|this| {
             Root{
                 this,
                 pragmas: Vec::new(),
                 imports: Vec::new(),
                 definitions: Vec::new(),
+            }
         });
         module.root_id = root_id;
         // Visit token ranges to detect defintions
         let mut visited_subranges = 0;
         for range_idx in expected_parent_idx + 1..module.tokens.ranges.len() {
             // Skip any ranges that do not belong to the module
             let cur_range = &module.tokens.ranges[range_idx];
             if cur_range.parent_idx != expected_parent_idx {
                 continue;
+            }
             // Parse if it is a definition or a pragma
             if cur_range.range_kind == TokenRangeKind::Definition {
                 self.visit_definition_range(modules, module_idx, ctx, range_idx)?;
             } else if cur_range.range_kind == TokenRangeKind::Pragma {
                 self.visit_pragma_range(modules, module_idx, ctx, range_idx)?;
+            }
             visited_subranges += 1;
             if visited_subranges == expected_subranges {
                 break;
+            }
+        }
         // By now all symbols should have been found: add to symbol table and
         // add the parsed pragmas to the preallocated root in the heap.
         debug_assert_eq!(visited_subranges, expected_subranges);
         ctx.symbols.insert_scoped_symbols(None, SymbolScope::Module(module.root_id), &self.symbols);
         let root = &mut ctx.heap[root_id];
         debug_assert!(root.pragmas.is_empty());
         root.pragmas.extend(&self.pragmas);
         module.phase = ModuleCompilationPhase::DefinitionsScanned;
         Ok(())
+    }
     fn visit_pragma_range(&mut self, modules: &mut [Module], module_idx: usize, ctx: &mut Ctx, range_idx: usize) -> Result<(), ParseError> {
         let module = &mut modules[module_idx];
         let range = &module.tokens.ranges[range_idx];
         let mut iter = module.tokens.iter_range(range);
         // Consume pragma name
         let (pragma_section, pragma_start, _) = consume_pragma(&self.source, &mut iter)?;
         // Consume pragma values
         if pragma_section == "#module" {
             // Check if name is defined twice within the same file
             if self.has_pragma_module {
                 return Err(ParseError::new_error(&module.source, pragma_start, "module name is defined twice"));
+            }
             // Consume the domain-name
             let (module_name, module_span) = consume_domain_ident(&module.source, &mut iter)?;
             if iter.next().is_some() {
                 return Err(ParseError::new_error(&module.source, iter.last_valid_pos(), "expected end of #module pragma after module name"));
+            }
             // Add to heap and symbol table
             let pragma_span = InputSpan::from_positions(pragma_start, module_span.end);
             let module_name = ctx.pool.intern(module_name);
             let pragma_id = ctx.heap.alloc_pragma(|this| Pragma::Module(PragmaModule{
                 this,
                 span: pragma_span,
                 value: Identifier{ span: module_span, value: module_name.clone() },
             }));
             self.pragmas.push(pragma_id);
             if let Err(other_module_root_id) = ctx.symbols.insert_module(module_name, module.root_id) {
                 // Naming conflict
                 let this_module = &modules[module_idx];
                 let other_module = seek_module(modules, other_module_root_id).unwrap();
                 let (other_module_pragma_id, _) = other_module.name.unwrap();
                 let other_pragma = ctx.heap[other_module_pragma_id].as_module();
                 return Err(ParseError::new_error_str_at_span(
                     &this_module.source, pragma_span, "conflict in module name"
                 ).with_info_str_at_span(
                     &other_module.source, other_pragma.span, "other module is defined here"
                 ));
+            }
             self.has_pragma_module = true;
         } else if pragma_section == "#version" {
             // Check if version is defined twice within the same file
             if self.has_pragma_version {
                 return Err(ParseError::new_error(&module.source, pragma_start, "module version is defined twice"));
+            }
             // Consume the version pragma
             let (version, version_span) = consume_integer_literal(&module.source, &mut iter, &mut self.buffer)?;
             let pragma_id = ctx.heap.alloc_pragma(|this| Pragma::Version(PragmaVersion{
                 this,
                 span: InputSpan::from_positions(pragma_start, version_span.end),
                 version,
             }));
             self.pragmas.push(pragma_id);
             self.has_pragma_version = true;
         } else {
             // Custom pragma, maybe we support this in the future, but for now
             // we don't.
             return Err(ParseError::new_error(&module.source, pragma_start, "illegal pragma name"));
+        }
         Ok(())
+    }
     fn visit_definition_range(&mut self, modules: &mut [Module], module_idx: usize, ctx: &mut Ctx, range_idx: usize) -> Result<(), ParseError> {
         let module = &modules[module_idx];
         let range = &module.tokens.ranges[range_idx];
         let definition_span = InputSpan::from_positions(
             module.tokens.start_pos(range),
             module.tokens.end_pos(range)
         );
         let mut iter = module.tokens.iter_range(range);
         // Because we're visiting a definition, we expect an ident that resolves
         // to a keyword indicating a definition.
         let kw_text = consume_ident_text(&module.source, &mut iter).unwrap();
         let kw = parse_definition_keyword(kw_text).unwrap();
         // Retrieve identifier and put in temp symbol table
         let definition_ident = consume_ident_text(&module.source, &mut iter)?;
         let definition_ident = ctx.pool.intern(definition_ident);
         let symbol_class = kw.as_symbol_class();
         // Get the token indicating the end of the definition to get the full
         // span of the definition
         let last_token = &module.tokens.tokens[range.end - 1];
         debug_assert_eq!(last_token.kind, TokenKind::CloseCurly);
         self.symbols.push(Symbol::new(
             module.root_id,
             SymbolScope::Module(module.root_id),
             definition_span,
             symbol_class,
             definition_ident
         ));
         Ok(())
+    }
+}
 pub(crate) struct ASTImportPrePass {
+}
 impl ASTImportPrePass {
     pub(crate) fn parse(&mut self, modules: &mut [Module], module_idx: usize, ctx: &mut Ctx) -> Result<(), ParseError> {
         let module = &modules[module_idx];
         let module_range = &module.tokens.ranges[0];
         debug_assert_eq!(module.phase, ModuleCompilationPhase::DefinitionsScanned);
         debug_assert_eq!(module_range.range_kind, TokenRangeKind::Module);
         let expected_parent_idx = 0;
         let expected_subranges = module_range.subranges;
         let mut visited_subranges = 0;
         for range_idx in expected_parent_idx + 1..module.tokens.ranges.len() {
             let cur_range = &module.tokens.ranges[range_idx];
             if cur_range.parent_idx != expected_parent_idx {
                 continue;
+            }
             visited_subranges += 1;
             if cur_range.range_kind == TokenRangeKind::Import {
                 self.visit_import_range(modules, module_idx, ctx, range_idx)?;
+            }
             if visited_subranges == expected_subranges {
                 break;
+            }
+        }
         Ok(())
+    }
     pub(crate) fn visit_import_range(
         &mut self, modules: &mut [Module], module_idx: usize, ctx: &mut Ctx, range_idx: usize
     ) -> Result<(), ParseError> {
         let module = &modules[module_idx];
         let import_range = &module.tokens.ranges[range_idx];
         debug_assert_eq!(import_range.range_kind, TokenRangeKind::Import);
         let mut iter = module.tokens.iter_range(import_range);
         // Consume "import"
         let _import_ident = consume_ident_text(&module.source, &mut iter)?;
         debug_assert_eq!(_import_ident, KW_IMPORT);
         // Consume module name
         let (module_name, _) = consume_domain_ident(&module.source, &mut iter)?;
         Ok(())
+    }
+}
 // Lexes definitions. Should be the first pass over each of the module files
 // after tokenization. Only once all definitions are parsed can we do the full
 // AST creation pass.
 struct LexerDefinitions {
 fn consume_domain_ident<'a>(source: &'a InputSource, iter: &mut TokenIter) -> Result<(&'a [u8], InputSpan), ParseError> {
     let (_, name_start, mut name_end) = consume_ident(source, iter)?;
     while let Some(TokenKind::Dot) = iter.next() {
         consume_dot(source, iter)?;
         let (_, _, new_end) = consume_ident(source, iter)?;
         name_end = new_end;
+    }
     Ok((source.section(name_start, name_end), InputSpan::from_positions(name_start, name_end)))
+}
 fn consume_dot<'a>(source: &'a InputSource, iter: &mut TokenIter) -> Result<(), ParseError> {
     if Some(TokenKind::Dot) != iter.next() {
         return Err(ParseError::new_error_str_at_pos(source, iter.last_valid_pos(), "expected a dot"));
+    }
     iter.consume();
     Ok(())
+}
 impl LexerDefinitions {
     pub(crate) fn parse(ctx: &mut Ctx) -> Result<(), ParseError> {
         debug_assert!(ctx.tokens.ranges.len() > 0);
 fn consume_integer_literal(source: &InputSource, iter: &mut TokenIter, buffer: &mut String) -> Result<(u64, InputSpan), ParseError> {
     if Some(TokenKind::Integer) != iter.next() {
         return Err(ParseError::new_error_str_at_pos(source, iter.last_valid_pos(), "expected an integer literal"));
+    }
     let (start_pos, end_pos) = iter.next_range();
     iter.consume();
     pub(crate) fn parse_definition(heap: &mut Heap, source: &InputSource, range: &TokenRang)
     let integer_text = source.section(start_pos, end_pos);
     // Determine radix and offset from prefix
     let (radix, input_offset, radix_name) =
         if integer_text.starts_with(b"0b") || integer_text.starts_with(b"0B") {
             // Binary number
             (2, 2, "binary")
         } else if integer_text.starts_with(b"0o") || integer_text.starts_with(b"0O") {
             // Octal number
             (8, 2, "octal")
         } else if integer_text.starts_with(b"0x") || integer_text.starts_with(b"0X") {
             // Hexadecimal number
             (16, 2, "hexadecimal")
         } else {
             (10, 0, "decimal")
         };
     // Take out any of the separating '_' characters
     buffer.clear();
     for char_idx in input_offset..integer_text.len() {
         let char = integer_text[char_idx];
         if char == b'_' {
             continue;
+        }
         if !char.is_ascii_digit() {
             return Err(ParseError::new_error(source, start_pos, "incorrectly formatted integer"));
+        }
         buffer.push(char::from(char));
+    }
     // Use the cleaned up string to convert to integer
     match u64::from_str_radix(&buffer, radix) {
         Ok(number) => Ok((number, InputSpan::from_positions(start_pos, end_pos))),
         Err(_) => Err(
             ParseError::new_error(source, start_pos, "incorrectly formatted integer")
         ),
+    }
+}
 fn seek_module(modules: &[Module], root_id: RootId) -> Option<&Module> {
     for module in modules {
         if module.root_id == root_id {
             return Some(module)
+        }
+    }
     return None
+}
 fn consume_pragma<'a>(source: &'a InputSource, iter: &mut TokenIter) -> Result<(&'a [u8], InputPosition, InputPosition), ParseError> {
     if Some(TokenKind::Pragma) != iter.next() {
         return Err(ParseError::new_error(source, iter.last_valid_pos(), "expected a pragma"));
+    }
     let (pragma_start, pragma_end) = iter.next_range();
     iter.consume();
     Ok((source.section(pragma_start, pragma_end), pragma_start, pragma_end))
+}
 fn consume_ident_text<'a>(source: &'a InputSource, iter: &mut TokenIter) -> Result<&'a [u8], ParseError> {
     if Some(TokenKind::Ident) != iter.next() {
         return Err(ParseError::new_error(source, iter.last_valid_pos(), "expected an identifier"));
+    }
     let (ident_start, ident_end) = iter.next_range();
     iter.consume();
     Ok(source.section(ident_start, ident_end))
+}
 fn consume_ident<'a>(source: &'a InputSource, iter: &mut TokenIter) -> Result<(&'a [u8], InputPosition, InputPosition), ParseError> {
     if Some(TokenKind::Ident) != iter.next() {
         return Err(ParseError::new_error(sourcee, iter.last_valid_pos(), "expected an identifier"));
+    }
     let (ident_start, ident_end) = iter.next_range();
     iter.consume();
     Ok((source.section(ident_start, ident_end), ident_start, ident_end))
+}
 fn parse_definition_keyword(keyword: &[u8]) -> Option<KeywordDefinition> {
     match keyword {
         KW_STRUCT =>    Some(Keyword::Struct),
         KW_ENUM =>      Some(Keyword::Enum),
         KW_UNION =>     Some(Keyword::Union),
         KW_FUNCTION =>  Some(Keyword::Function),
         KW_PRIMITIVE => Some(Keyword::Primitive),
         KW_COMPOSITE => Some(Keyword::Composite),
         _ => None
+    }
+}
@@ \ No newline at end of file @@

src/protocol/parser/mod.rs

➞

Show inline comments

 mod depth_visitor;
 pub(crate) mod symbol_table;
 pub(crate) mod symbol_table2;
 pub(crate) mod type_table;
 mod type_resolver;
 mod visitor;

src/protocol/parser/symbol_table2.rs

➞

Show inline comments

@@ new file 100644 @@
 use std::collections::HashMap;
 use std::collections::hash_map::Entry;
 use crate::protocol::input_source2::*;
 use crate::protocol::ast::*;
 use crate::collections::*;
 #[derive(Clone, Copy, PartialEq, Eq)]
 pub enum SymbolScope {
     Module(RootId),
     Definition(DefinitionId),
+}
 #[derive(Clone, Copy, PartialEq, Eq)]
 pub enum SymbolClass {
     Module,
     Struct,
     Enum,
     Union,
     Function,
     Component
+}
 struct ScopedSymbols {
     scope: SymbolScope,
     parent_scope: Option<SymbolScope>,
     child_scopes: Vec<SymbolScope>,
     start: usize,
     end: usize,
+}
 pub struct Symbol {
     // Definition location
     pub defined_in_module: RootId,
     pub defined_in_scope: SymbolScope,
     pub definition_span: InputSpan, // full span of definition
     // Introduction location (if imported instead of defined)
     // Symbol properties
     pub class: SymbolClass,
     pub name: StringRef,
     pub definition: Option<DefinitionId>,
+}
 impl Symbol {
     pub(crate) fn new(root_id: RootId, scope: SymbolScope, span: InputSpan, class: SymbolClass, name: StringRef) -> Self {
         Self{
             defined_in_module: root_id,
             defined_in_scope: scope,
             definition_span: span,
             class,
             name,
             definition: None,
+        }
+    }
+}
 pub struct SymbolTable {
     module_lookup: HashMap<StringRef, RootId>,
     scope_lookup: HashMap<SymbolScope, ScopedSymbols>,
     symbols: Vec<Symbol>,
+}
 impl SymbolTable {
     /// Inserts a new module by its name. Upon module naming conflict the
     /// previously associated `RootId` will be returned.
     pub(crate) fn insert_module(&mut self, module_name: StringRef, root_id: RootId) -> Result<(), RootId> {
         match self.module_lookup.entry(module_name) {
             Entry::Occupied(v) => {
                 Err(*v.get())
             },
             Entry::Vacant(v) => {
                 v.insert(root_id);
                 Ok(())
+            }
+        }
+    }
     /// Inserts a new scope with defined symbols. The `parent_scope` must
     /// already be added to the symbol table. The symbols are expected to come
     /// from a temporary buffer and are copied inside the symbol table. Will
     /// return an error if there is a naming conflict.
     pub(crate) fn insert_scoped_symbols(
         &mut self, parent_scope: Option<SymbolScope>, within_scope: SymbolScope, symbols: &[Symbol]
     ) -> Result<(), ParseError> {
         // Add scoped symbols
         let old_num_symbols = self.symbols.len();
         let new_scope = ScopedSymbols {
             scope: within_scope,
             parent_scope,
             child_scopes: Vec::new(),
             start: old_num_symbols,
             end: old_num_symbols + symbols.len(),
         };
         self.symbols.extend(symbols);
         self.scope_lookup.insert(within_scope, new_scope);
         if let Some(parent_scope) = parent_scope.as_ref() {
             let parent = self.scope_lookup.get_mut(parent_scope).unwrap();
             parent.child_scopes.push(within_scope);
+        }
         Ok(())
+    }
+}
@@ \ No newline at end of file @@

src/protocol/tokenizer/mod.rs

➞

Show inline comments

 use crate::protocol::input_source2::{InputSource2 as InputSource, ParseError, InputPosition2 as InputPosition, InputSpan};
 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
 use crate::protocol::input_source2::{
     InputSource2 as InputSource,
     ParseError,
     InputPosition2 as InputPosition,
     InputSpan
 };
 pub(crate) const KW_STRUCT:    &'static [u8] = b"struct";
 pub(crate) const KW_ENUM:      &'static [u8] = b"enum";
 pub(crate) const KW_UNION:     &'static [u8] = b"union";
 pub(crate) const KW_FUNCTION:  &'static [u8] = b"func";
 pub(crate) const KW_PRIMITIVE: &'static [u8] = b"primitive";
 pub(crate) const KW_COMPOSITE: &'static [u8] = b"composite";
 pub(crate) const KW_IMPORT:    &'static [u8] = b"import";
 #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
 pub(crate) enum TokenKind {
     // Variable-character tokens, followed by a SpanEnd token
     Ident,          // regular identifier
@@ @@ -11,7 +23,7 @@ pub(crate) enum TokenKind { @@
     Character,      // character literal, range includes `'`
     LineComment,    // line comment, range includes leading `//`, but not newline
     BlockComment,   // block comment, range includes leading `/*` and trailing `*/`
     // Punctuation
+    // Punctuation (single character)
     Exclamation,    // !
     Question,       // ?
     Pound,          // #
@@ @@ -24,48 +36,68 @@ pub(crate) enum TokenKind { @@
     CloseParen,     // )
     CloseSquare,    // ]
     Colon,          // :
     ColonColon,     // ::
     Comma,          // ,
     Dot,            // .
     DotDot,         // ..
     SemiColon,      // ;
     Quote,          // '
     DoubleQuote,    // "
     // Operator-like
+    // Operator-like (single character)
     At,             // @
     Plus,           // +
     PlusPlus,       // ++
     PlusEquals,     // +=
     Minus,          // -
     Star,           // *
     Slash,          // /
     Percent,        // %
     Caret,          // ^
     And,            // &
     Or,             // |
     Tilde,          // ~
     Equal,          // =
     // Punctuation (two characters)
     ColonColon,     // ::
     DotDot,         // ..
     ArrowRight,     // ->
     // Operator-like (two characters)
     PlusPlus,       // ++
     PlusEquals,     // +=
     MinusMinus,     // --
     MinusEquals,    // -=
     Star,           // *
     StarEquals,     // *=
     Slash,          // /
     SlashEquals,    // /=
     Percent,        // %
     PercentEquals,  // %=
     Caret,          // ^
     CaretEquals,    // ^=
     And,            // &
     AndAnd,         // &&
     AndEquals,      // &=
     Or,             // |
     OrOr,           // ||
     OrEquals,       // |=
     Tilde,          // ~
     Equal,          // =
     EqualEqual,     // ==
     NotEqual,       // !=
     ShiftLeft,      // <<
     ShiftLeftEquals,// <<=
     ShiftRight,     // >>
     // Operator-like (three characters)
     ShiftLeftEquals,// <<=
     ShiftRightEquals, // >>=
     // Special marker token to indicate end of variable-character tokens
     SpanEnd,
+}
 impl TokenKind {
     fn has_span_end(&self) -> bool {
         return *self <= TokenKind::BlockComment
+    }
     fn num_characters(&self) -> u32 {
         debug_assert!(!self.has_span_end() && *self != TokenKind::SpanEnd);
         if *self <= TokenKind::Equal {
         } else if *self <= TokenKind::ShiftRight {
         } else {
+        }
+    }
+}
 pub(crate) struct Token {
     pub kind: TokenKind,
     pub pos: InputPosition,
@@ @@ -86,16 +118,18 @@ pub(crate) enum TokenRangeKind { @@
     Code,
+}
 /// TODO: Add first_child and next_sibling indices for slightly faster traversal
 #[derive(Debug)]
 struct TokenRange {
+pub(crate) struct TokenRange {
     // Index of parent in `TokenBuffer.ranges`, does not have a parent if the
     // range kind is Module, in that case the parent index points to itself.
     parent_idx: usize,
     range_kind: TokenRangeKind,
     curly_depth: i32,
     start: usize,
     end: usize,
     subranges: usize,
     pub parent_idx: usize,
     pub range_kind: TokenRangeKind,
     pub curly_depth: u32,
     // InputPosition offset is limited to u32, so token ranges can be as well.
     pub start: u32,
     pub end: u32,
     pub subranges: u32,
+}
 pub(crate) struct TokenBuffer {
@@ @@ -107,26 +141,126 @@ impl TokenBuffer { @@
     pub(crate) fn new() -> Self {
         Self{ tokens: Vec::new(), ranges: Vec::new() }
+    }
     pub(crate) fn iter_range<'a>(&'a self, range: &TokenRange) -> TokenIter<'a> {
         TokenIter::new(self, range.start as usize, range.end as usize)
+    }
     pub(crate) fn start_pos(&self, range: &TokenRange) -> InputPosition {
         self.tokens[range.start].pos
+    }
     pub(crate) fn end_pos(&self, range: &TokenRange) -> InputPosition {
         let last_token = &self.tokens[range.end - 1];
         if last_token.kind == TokenKind::SpanEnd {
             return last_token.pos
         } else {
             debug_assert!(!last_token.kind.has_span_end());
             return last_token.pos.with_offset(last_token.kind.num_characters());
+        }
+    }
+}
 pub(crate) struct TokenIter<'a> {
     tokens: &'a Vec<Token>,
     cur: usize,
     end: usize,
+}
 // Tokenizer is a reusable parser to tokenize multiple source files using the
 // same allocated buffers. In a well-formed program, we produce a consistent
 // tree of token ranges such that we may identify tokens that represent a
 // defintion or an import before producing the entire AST.
 //
 // If the program is not well-formed then the tree may be inconsistent, but we
 // will detect this once we transform the tokens into the AST. Maybe we want to
 // detect a mismatch in opening/closing curly braces in the future?
 impl<'a> TokenIter<'a> {
     fn new(buffer: &'a TokenBuffer, start: usize, end: usize) -> Self {
         Self{ tokens: &buffer.tokens, cur: start, end }
+    }
     /// Returns the next token (may include comments), or `None` if at the end
     /// of the range.
     pub(crate) fn next_including_comments(&self) -> Option<TokenKind> {
         if self.cur >= self.end {
             return None;
+        }
         let token = &self.tokens[self.cur];
         Some(token.kind)
+    }
     /// Returns the next token (but skips over comments), or `None` if at the
     /// end of the range
     pub(crate) fn next(&mut self) -> Option<TokenKind> {
         while let Some(token_kind) = self.next() {
             if token_kind != TokenKind::LineComment && token_kind != TokenKind::BlockComment {
                 return Some(token_kind);
+            }
             self.consume();
+        }
         return None
+    }
     /// Returns the start position belonging to the token returned by `next`. If
     /// there is not a next token, then we return the end position of the
     /// previous token.
     pub(crate) fn last_valid_pos(&self) -> InputPosition {
         if self.cur < self.end {
             // Return token position
             return self.tokens[self.cur].pos
+        }
         // Return previous token end
         let token = &self.tokens[self.cur - 1];
         return if token.kind == TokenKind::SpanEnd {
             token.pos
         } else {
             token.pos.with_offset(token.kind.num_characters());
         };
+    }
     /// Returns the token range belonging to the token returned by `next`. This
     /// assumes that we're not at the end of the range we're iterating over.
     pub(crate) fn next_range(&self) -> (InputPosition, InputPosition) {
         debug_assert!(self.cur < self.end);
         let token = &self.tokens[self.cur];
         if token.kind.has_span_end() {
             let span_end = &self.tokens[self.cur + 1];
             debug_assert_eq!(span_end.kind, TokenKind::SpanEnd);
             (token.pos, span_end.pos)
         } else {
             let offset = token.kind.num_characters();
             (token.pos, token.pos.with_offset(offset))
+        }
+    }
     pub(crate) fn consume(&mut self) {
         if let Some(kind) = self.next() {
             if kind.has_span_end() {
                 self.cur += 2;
             } else {
                 self.cur += 1;
+            }
+        }
+    }
+}
 /// Tokenizer is a reusable parser to tokenize multiple source files using the
 /// same allocated buffers. In a well-formed program, we produce a consistent
 /// tree of token ranges such that we may identify tokens that represent a
 /// defintion or an import before producing the entire AST.
 ///
 /// If the program is not well-formed then the tree may be inconsistent, but we
 /// will detect this once we transform the tokens into the AST. To ensure a
 /// consistent AST-producing phase we will require the import to have balanced
 /// curly braces
 pub(crate) struct Tokenizer {
     // Signed because programmer might have placed too many closing curly braces
     curly_depth: i32,
     // Stack of input positions of opening curly braces, used to detect
     // unmatched opening braces, unmatched closing braces are detected
     // immediately.
     curly_stack: Vec<InputPosition>,
     // Points to an element in the `TokenBuffer.ranges` variable.
     stack_idx: usize,
+}
 impl Tokenizer {
     pub(crate) fn new() -> Self {
-        Self{ curly_depth: 0, stack_idx: 0 }
+        Self{ curly_stack: Vec::with_capacity(32), stack_idx: 0 }
+    }
     pub(crate) fn tokenize(&mut self, source: &mut InputSource, target: &mut TokenBuffer) -> Result<(), ParseError> {
         // Assert source and buffer are at start
@@ @@ -150,7 +284,7 @@ impl Tokenizer { @@
         // Main tokenization loop
         while let Some(c) = source.next() {
             let token_index = target.tokens.len();
+            let token_index = target.tokens.len() as u32;
             if is_char_literal_start(c) {
                 self.consume_char_literal(source, target)?;
@@ @@ -180,28 +314,37 @@ impl Tokenizer { @@
                 if contained_newline {
                     let range = &target.ranges[self.stack_idx];
                     if range.range_kind == TokenRangeKind::Pragma {
                         self.pop_range(target, target.tokens.len());
+                        self.pop_range(target, target.tokens.len() as u32);
+                    }
+                }
             } else {
                 let was_punctuation = self.maybe_parse_punctuation(c, source, target)?;
                 if let Some(token) = was_punctuation {
+                if let Some((token, token_pos)) = was_punctuation {
                     if token == TokenKind::OpenCurly {
-                        self.curly_depth += 1;
+                        self.curly_stack.push(token_pos);
                     } else if token == TokenKind::CloseCurly {
                         // Check if this marks the end of a range we're
                         // currently processing
                         self.curly_depth -= 1;
                         if self.curly_stack.is_empty() {
                             return Err(ParseError::new_error(
                                 source, token_pos, "unmatched closing curly brace '}'"
                             ));
+                        }
                         self.curly_stack.pop();
                         let range = &target.ranges[self.stack_idx];
                         if range.range_kind == TokenRangeKind::Definition && range.curly_depth == self.curly_depth {
                             self.pop_range(target, target.tokens.len());
+                            self.pop_range(target, target.tokens.len() as u32);
+                        }
                         // Exit early if we have more closing curly braces than
                         // opening curly braces
                     } else if token == TokenKind::SemiColon {
                         // Check if this marks the end of an import
                         let range = &target.ranges[self.stack_idx];
                         if range.range_kind == TokenRangeKind::Import {
                             self.pop_range(target, target.tokens.len());
+                            self.pop_range(target, target.tokens.len() as u32);
+                        }
+                    }
                 } else {
@@ @@ -215,6 +358,15 @@ impl Tokenizer { @@
             return Err(error);
+        }
         if !self.curly_stack.is_empty() {
             // Let's not add a lot of heuristics and just tell the programmer
             // that something is wrong
             let last_unmatched_open = self.curly_stack.pop().unwrap();
             return Err(ParseError::new_error(
                 source, last_unmatched_open, "unmatched opening curly brace '{'"
             ));
+        }
         Ok(())
+    }
@@ @@ -226,7 +378,9 @@ impl Tokenizer { @@
         return first_char == b'/' && Some(b'*') == source.lookahead(1);
+    }
     fn maybe_parse_punctuation(&mut self, first_char: u8, source: &mut InputSource, target: &mut TokenBuffer) -> Result<Option<TokenKind>, ParseError> {
     fn maybe_parse_punctuation(
         &mut self, first_char: u8, source: &mut InputSource, target: &mut TokenBuffer
     ) -> Result<Option<(TokenKind, InputPosition)>, ParseError> {
         debug_assert!(first_char != b'#', "'#' needs special handling");
         debug_assert!(first_char != b'\'', "'\'' needs special handling");
         debug_assert!(first_char != b'"', "'\"' needs special handling");
@@ @@ -412,7 +566,7 @@ impl Tokenizer { @@
+        }
         target.tokens.push(Token::new(token_kind, pos));
         Ok(Some(token_kind))
+        Ok(Some((token_kind, pos)))
+    }
     fn consume_char_literal(&mut self, source: &mut InputSource, target: &mut TokenBuffer) -> Result<(), ParseError> {
@@ @@ -610,7 +764,7 @@ impl Tokenizer { @@
         let end_pos = source.pos();
         target.tokens.push(Token::new(TokenKind::Ident, begin_pos));
         target.tokens.push(Token::new(TokenKind::SpanEnd, end_pos));
-        Ok(source.section(begin_pos.offset, end_pos.offset))
         Ok(source.section(begin_pos, end_pos))
+    }
     fn consume_number(&mut self, source: &mut InputSource, target: &mut TokenBuffer) -> Result<(), ParseError> {
@@ @@ -656,23 +810,18 @@ impl Tokenizer { @@
+    }
     /// Pushes a new token range onto the stack in the buffers.
-    fn push_range(&mut self, target: &mut TokenBuffer, range_kind: TokenRangeKind, first_token: usize) {
+    fn push_range(&mut self, target: &mut TokenBuffer, range_kind: TokenRangeKind, first_token: u32) {
         let cur_range = &mut target.ranges[self.stack_idx];
         println!(
             "DEBUG: push_range [1] | stack_idx: {}, range_end: {}, first_token: {}",
             self.stack_idx, cur_range.end, first_token
         );
         // If we have just popped a range and then push a new range, then the
         // first token is equal to the last token registered on the current
         // range. If not, then we had some intermediate tokens that did not
         // belong to a particular kind of token range: hence we insert an
         // intermediate "code" range.
         if cur_range.end != first_token {
             println!("DEBUG: push_range [2] | inserting code range");
             let code_start = cur_range.end;
             cur_range.end = first_token;
             debug_assert_ne!(code_start, first_token);
             cur_range.subranges += 1;
             target.ranges.push(TokenRange{
                 parent_idx: self.stack_idx,
@@ @@ -685,10 +834,6 @@ impl Tokenizer { @@
+        }
         // Insert a new range
         println!(
             "DEBUG: push_range [3] | kind: {:?}, parent_idx: {}, stack_idx: {}",
             range_kind, self.stack_idx, target.ranges.len()
         );
         let parent_idx = self.stack_idx;
         self.stack_idx = target.ranges.len();
         target.ranges.push(TokenRange{
@@ @@ -701,26 +846,19 @@ impl Tokenizer { @@
         });
+    }
-    fn pop_range(&mut self, target: &mut TokenBuffer, end_index: usize) {
+    fn pop_range(&mut self, target: &mut TokenBuffer, end_index: u32) {
         let last = &mut target.ranges[self.stack_idx];
         debug_assert!(self.stack_idx != last.parent_idx, "attempting to pop top-level range");
         // Fix up the current range before going back to parent
         println!(
             "DEBUG: pop_range  [1] | stack_idx: {}, kind: {:?}, start: {}, old_end: {}, new_end: {}",
             self.stack_idx, last.range_kind, last.start, last.end, end_index
         );
         last.end = end_index;
         debug_assert_ne!(last.start, end_index);
         // Go back to parent
         self.stack_idx = last.parent_idx;
         let parent = &mut target.ranges[self.stack_idx];
         parent.end = end_index;
         parent.subranges += 1;
         println!(
             "DEBUG: pop_range  [2] | returning to kind: {:?}, idx: {}, new_end: {}",
             parent.range_kind, self.stack_idx, end_index
         );
+    }
@@ @@ -739,16 +877,16 @@ impl Tokenizer { @@
 // Helpers for characters
 fn demarks_definition(ident: &[u8]) -> bool {
     return
         ident == b"struct" ||
         ident == b"enum" ||
         ident == b"union" ||
         ident == b"func" ||
         ident == b"primitive" ||
         ident == b"composite"
         ident == KW_STRUCT ||
         ident == KW_ENUM ||
         ident == KW_UNION ||
         ident == KW_FUNCTION ||
         ident == KW_PRIMITIVE ||
         ident == KW_COMPOSITE
+}
 fn demarks_import(ident: &[u8]) -> bool {
-    return ident == b"import";
+    return ident == KW_IMPORT;
+}
 fn is_whitespace(c: u8) -> bool {
@@ @@ -856,7 +994,7 @@ mod tests { @@
                     let (_, end) = iter.next().unwrap();
                     println!("[{}] {:?} ......", idx, token.kind);
                     assert_eq!(end.kind, TokenKind::SpanEnd);
-                    let text = source.section(token.pos.offset, end.pos.offset);
                     let text = source.section(token.pos, end.pos);
                     println!("{}", String::from_utf8_lossy(text));
                 },
                 _ => {

0 comments (0 inline, 0 general)