diff --git a/src/collections/string_pool.rs b/src/collections/string_pool.rs index 5cd82b91d55b8ebff11b98bd504745ceb5d1616d..30cb8a5c6451ca390b91ed9c5a3d93bf989a2406 100644 --- a/src/collections/string_pool.rs +++ b/src/collections/string_pool.rs @@ -1,7 +1,10 @@ use std::ptr::null_mut; +use std::collections::hash_map::DefaultHasher; +use std::hash::{Hash, Hasher}; const SLAB_SIZE: usize = u16::max_value() as usize; +#[derive(Clone)] pub struct StringRef { data: *const u8, length: usize, @@ -16,6 +19,22 @@ impl StringRef { } } +impl PartialEq for StringRef { + fn eq(&self, other: &StringRef) -> bool { + self.as_str() == other.as_str() + } +} + +impl Eq for StringRef {} + +impl Hash for StringRef { + fn hash(&self, state: &mut H) { + unsafe{ + state.write(std::slice::from_raw_parts(self.data, self.length)); + } + } +} + struct StringPoolSlab { prev: *mut StringPoolSlab, data: Vec, @@ -60,7 +79,7 @@ impl StringPool { last = unsafe{&mut *self.last}; } - // Must fit now + // Must fit now, compute hash and put in buffer debug_assert!(data_len <= last.remaining); let range_start = last.data.len(); last.data.extend_from_slice(data); diff --git a/src/protocol/ast.rs b/src/protocol/ast.rs index 579e804fe917139cf8717191e9ab5f1b4ebf89b8..2d42e0c68ffae8f1eb38fba278842d7de83d7a54 100644 --- a/src/protocol/ast.rs +++ b/src/protocol/ast.rs @@ -6,7 +6,9 @@ use std::fmt::{Debug, Display, Formatter}; use std::ops::{Index, IndexMut}; use super::arena::{Arena, Id}; +use crate::collections::StringRef; use crate::protocol::inputsource::*; +use crate::protocol::input_source2::{InputPosition2, InputSpan}; /// Global limits to the AST, should be checked by lexer and parser. Some are /// arbitrary @@ -238,7 +240,7 @@ impl Index for Heap { pub struct Root { pub this: RootId, // Phase 1: parser - pub position: InputPosition, + // pub position: InputPosition, pub pragmas: Vec, pub imports: Vec, pub definitions: Vec, @@ -264,14 +266,23 @@ impl SyntaxElement for Root { #[derive(Debug, Clone)] pub enum Pragma { Version(PragmaVersion), - Module(PragmaModule) + Module(PragmaModule), +} + +impl Pragma { + pub(crate) fn as_module(&self) -> &PragmaModule { + match self { + Pragma::Module(pragma) => pragma, + _ => unreachable!("Tried to obtain {:?} as PragmaModule", self), + } + } } #[derive(Debug, Clone)] pub struct PragmaVersion { pub this: PragmaId, // Phase 1: parser - pub position: InputPosition, + pub span: InputSpan, // of full pragma pub version: u64, } @@ -279,22 +290,8 @@ pub struct PragmaVersion { pub struct PragmaModule { pub this: PragmaId, // Phase 1: parser - pub position: InputPosition, - pub value: Vec, -} - -#[derive(Debug, Clone)] -pub struct PragmaOld { - pub this: PragmaId, - // Phase 1: parser - pub position: InputPosition, - pub value: Vec, -} - -impl SyntaxElement for PragmaOld { - fn position(&self) -> InputPosition { - self.position - } + pub span: InputSpan, // of full pragma + pub value: Identifier, } #[derive(Debug, Clone)] @@ -365,8 +362,8 @@ pub struct ImportSymbols { #[derive(Debug, Clone)] pub struct Identifier { - pub position: InputPosition, - pub value: Vec + pub span: InputSpan, + pub value: StringRef, } impl PartialEq for Identifier { diff --git a/src/protocol/input_source2.rs b/src/protocol/input_source2.rs index 54888d0e7e255ce861097b47abbf4503a7c3ae2e..f2be5f9812a61dc4a6485c25c288b354cdcd47ca 100644 --- a/src/protocol/input_source2.rs +++ b/src/protocol/input_source2.rs @@ -1,5 +1,6 @@ use std::fmt; use std::cell::{Ref, RefCell}; +use std::fmt::Write; #[derive(Debug, Clone, Copy)] pub struct InputPosition2 { @@ -7,6 +8,13 @@ pub struct InputPosition2 { pub offset: u32, } +impl InputPosition2 { + pub(crate) fn with_offset(&self, offset: u32) -> Self { + InputPosition2{ line: self.line, offset: self.offset + offset } + } +} + +#[derive(Debug, Clone, Copy)] pub struct InputSpan { pub begin: InputPosition2, pub end: InputPosition2, @@ -14,7 +22,7 @@ pub struct InputSpan { impl InputSpan { #[inline] - fn from_positions(begin: InputPosition2, end: InputPosition2) -> Self { + pub fn from_positions(begin: InputPosition2, end: InputPosition2) -> Self { Self { begin, end } } } @@ -75,8 +83,8 @@ impl InputSource2 { } } - pub fn section(&self, start: u32, end: u32) -> &[u8] { - &self.input[start as usize..end as usize] + pub fn section(&self, start: InputPosition2, end: InputPosition2) -> &[u8] { + &self.input[start.offset as usize..end.offset as usize] } // Consumes the next character. Will check well-formedness of newlines: \r @@ -145,11 +153,14 @@ impl InputSource2 { return lookup; } + /// Retrieves offset at which line starts (right after newline) fn lookup_line_start_offset(&self, line_number: u32) -> u32 { let lookup = self.get_lookup(); lookup[line_number as usize] } + /// Retrieves offset at which line ends (at the newline character or the + /// preceding carriage feed for \r\n-encoded newlines) fn lookup_line_end_offset(&self, line_number: u32) -> u32 { let lookup = self.get_lookup(); let offset = lookup[(line_number + 1) as usize] - 1; @@ -169,79 +180,186 @@ impl InputSource2 { } #[derive(Debug)] -pub enum ParseErrorType { +pub enum StatementKind { Info, Error } +#[derive(Debug)] +pub enum ContextKind { + SingleLine, + MultiLine, +} + #[derive(Debug)] pub struct ParseErrorStatement { - pub(crate) error_type: ParseErrorType, - pub(crate) line: u32, - pub(crate) column: u32, - pub(crate) offset: u32, + pub(crate) statement_kind: StatementKind, + pub(crate) context_kind: ContextKind, + pub(crate) start_line: u32, + pub(crate) start_column: u32, + pub(crate) end_line: u32, + pub(crate) end_column: u32, pub(crate) filename: String, pub(crate) context: String, pub(crate) message: String, } impl ParseErrorStatement { - fn from_source(error_type: ParseErrorType, source: &InputSource2, position: InputPosition2, msg: &str) -> Self { + fn from_source_at_pos(statement_kind: StatementKind, source: &InputSource2, position: InputPosition2, message: String) -> Self { // Seek line start and end let line_start = source.lookup_line_start_offset(position.line); let line_end = source.lookup_line_end_offset(position.line); + let context = Self::create_context(source, line_start as usize, line_end as usize); debug_assert!(position.offset >= line_start); let column = position.offset - line_start + 1; Self{ - error_type, - line: position.line, - column, - offset: position.offset, + statement_kind, + context_kind: ContextKind::SingleLine, + start_line: position.line, + start_column: column, + end_line: position.line, + end_column: column + 1, + filename: source.filename.clone(), + context, + message, + } + } + + fn from_source_at_span(statement_kind: StatementKind, source: &InputSource2, span: InputSpan, message: String) -> Self { + debug_assert!(span.end.line >= span.begin.line); + debug_assert!(span.end.offset >= span.begin.offset); + + let first_line_start = source.lookup_line_start_offset(span.begin.line); + let last_line_start = source.lookup_line_start_offset(span.end.line); + let last_line_end = source.lookup_line_end_offset(span.end.line); + let context = Self::create_context(source, first_line_start as usize, last_line_end as usize); + debug_assert!(span.begin.offset >= first_line_start); + let start_column = span.begin.offset - first_line_start + 1; + let end_column = span.end.offset - last_line_start + 1; + + let context_kind = if span.begin.line == span.end.line { + ContextKind::SingleLine + } else { + ContextKind::MultiLine + }; + + Self{ + statement_kind, + context_kind, + start_line: first_line_start, + start_column, + end_line: last_line_start, + end_column, filename: source.filename.clone(), - context: String::from_utf8_lossy(&source.input[line_start as usize..line_end as usize]).to_string(), - message: msg.to_string() + context, + message, } } + + /// Produces context from source + fn create_context(source: &InputSource2, start: usize, end: usize) -> String { + let context_raw = &source.input[start..end]; + String::from_utf8_lossy(context_raw).to_string() + } } impl fmt::Display for ParseErrorStatement { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - // Write message - match self.error_type { - ParseErrorType::Info => write!(f, " INFO: ")?, - ParseErrorType::Error => write!(f, "ERROR: ")?, + // Write kind of statement and message + match self.statement_kind { + StatementKind::Info => f.write_str(" INFO: ")?, + StatementKind::Error => f.write_str("ERROR: ")?, } - writeln!(f, "{}", &self.message)?; + f.write_str(&self.message)?; + f.write_char('\n')?; // Write originating file/line/column - if self.filename.is_empty() { - writeln!(f, " +- at {}:{}", self.line, self.column)?; - } else { - writeln!(f, " +- at {}:{}:{}", self.filename, self.line, self.column)?; + f.write_str(" +- ")?; + if !self.filename.is_empty() { + write!(f, "in {} ", self.filename)?; + } + + match self.context_kind { + ContextKind::SingleLine => writeln!(f, " at {}:{}", self.start_line, self.start_column), + ContextKind::MultiLine => writeln!( + f, " from {}:{} to {}:{}", + self.start_line, self.start_column, self.end_line, self.end_column + ) + }?; + + // Helper function for writing context: converting tabs into 4 spaces + // (oh, the controversy!) and creating an annotated line + fn transform_context(source: &str, target: &mut String) { + for char in source.chars() { + if char == '\t' { + target.push_str(" "); + } else { + target.push(char); + } + } + } + + fn extend_annotation(first_col: u32, last_col: u32, source: &str, target: &mut String, extend_char: char) { + debug_assert!(first_col > 0 && last_col > first_col); + for (char_idx, char) in source.chars().enumerate().skip(first_col as usize - 1) { + if char_idx == last_col as usize { + break; + } + + if char == '\t' { + for _ in 0..4 { target.push(extend_char); } + } else { + target.push(extend_char); + } + } } // Write source context writeln!(f, " | ")?; - writeln!(f, " | {}", self.context)?; - - // Write underline indicating where the error ocurred - debug_assert!(self.column as usize <= self.context.chars().count()); - let mut arrow = String::with_capacity(self.context.len() + 3); - arrow.push_str(" | "); - let mut char_col = 1; - for char in self.context.chars() { - if char_col == self.column { break; } - if char == '\t' { - arrow.push('\t'); - } else { - arrow.push(' '); - } - char_col += 1; + let mut context = String::with_capacity(128); + let mut annotation = String::with_capacity(128); + + match self.context_kind { + ContextKind::SingleLine => { + // Write single line of context with indicator for the offending + // span underneath. + transform_context(&self.context, &mut context); + context.push('\n'); + f.write_str(&context)?; + + annotation.push_str(" | "); + extend_annotation(1, self.start_column, &self.source, &mut annotation, ' '); + extend_annotation(self.start_column, self.end_column, &self.source, &mut annotation, '~'); + annotation.push('\n'); + + f.write_str(&annotation)?; + }, + ContextKind::MultiLine => { + // Annotate all offending lines + // - first line + let mut lines = self.context.lines(); + let first_line = lines.next().unwrap(); + transform_context(first_line, &mut context); + writeln!(" |- {}", &context)?; + + // - remaining lines + let mut last_line = first_line; + while let Some(cur_line) = lines.next() { + context.clear(); + transform_context(cur_line, &mut context); + writeln!(" | {}", &context); + last_line = cur_line; + } + + // - underline beneath last line + annotation.push_str(" \\__"); + extend_annotation(1, self.end_column, &last_line, &mut annotation, '_'); + annotation.push_str("/\n"); + f.write_str(&annotation)?; + } } - arrow.push('^'); - writeln!(f, "{}", arrow)?; Ok(()) } @@ -273,21 +391,53 @@ impl ParseError { Self{ statements: Vec::new() } } - pub fn new_error(source: &InputSource2, position: InputPosition2, msg: &str) -> Self { - Self{ statements: vec!(ParseErrorStatement::from_source(ParseErrorType::Error, source, position, msg))} + pub fn new_error_at_pos(source: &InputSource2, position: InputPosition2, message: String) -> Self { + Self{ statements: vec!(ParseErrorStatement::from_source_at_pos( + StatementKind::Error, source, position, message + )) } + } + + pub fn new_error_str_at_pos(source: &InputSource2, position: InputPosition2, message: &str) -> Self { + Self{ statements: vec!(ParseErrorStatement::from_source_at_pos( + StatementKind::Error, source, position, message.to_string() + )) } + } + + pub fn new_error_at_span(source: &InputSource2, span: InputSpan, message: String) -> Self { + Self{ statements: vec!(ParseErrorStatement::from_source_at_span( + StatementKind::Error, source, span, message + )) } + } + + pub fn new_error_str_at_span(source: &InputSource2, span: InputSpan, message: &str) -> Self { + Self{ statements: vec!(ParseErrorStatement::from_source_at_span( + StatementKind::Error, source, span, message.to_string() + )) } } - pub fn with_prefixed(mut self, error_type: ParseErrorType, source: &InputSource2, position: InputPosition2, msg: &str) -> Self { - self.statements.insert(0, ParseErrorStatement::from_source(error_type, source, position, msg)); + pub fn with_at_pos(mut self, error_type: StatementKind, source: &InputSource2, position: InputPosition2, message: String) -> Self { + self.statements.push(ParseErrorStatement::from_source_at_pos(error_type, source, position, message)); self } - pub fn with_postfixed(mut self, error_type: ParseErrorType, source: &InputSource2, position: InputPosition2, msg: &str) -> Self { - self.statements.push(ParseErrorStatement::from_source(error_type, source, position, msg)); + pub fn with_at_span(mut self, error_type: StatementKind, source: &InputSource2, span: InputSpan, message: String) -> Self { + self.statements.push(ParseErrorStatement::from_source_at_span(error_type, source, span, message.to_string())); self } - pub fn with_postfixed_info(self, source: &InputSource2, position: InputPosition2, msg: &str) -> Self { - self.with_postfixed(ParseErrorType::Info, source, position, msg) + pub fn with_info_at_pos(self, source: &InputSource2, position: InputPosition2, msg: String) -> Self { + self.with_at_pos(StatementKind::Info, source, position, msg) + } + + pub fn with_info_str_at_pos(self, source: &InputSource2, position: InputPosition2, msg: &str) -> Self { + self.with_at_pos(StatementKind::Info, source, position, msg.to_string()) + } + + pub fn with_info_at_span(self, source: &InputSource2, span: InputSpan, msg: String) -> Self { + self.with_at_span(StatementKind::Info, source, span, msg) + } + + pub fn with_info_str_at_span(self, source: &InputSource2, span: InputSpan, msg: &str) -> Self { + self.with_at_span(StatementKind::Info, source, span, msg.to_string()) } } diff --git a/src/protocol/lexer2.rs b/src/protocol/lexer2.rs index 50e7ff55cb147ab4c7668d0576badd1e67713cc2..1cbb06f354f4a7b846c6992ac58ba0cd033f80a6 100644 --- a/src/protocol/lexer2.rs +++ b/src/protocol/lexer2.rs @@ -1,24 +1,418 @@ +use crate::protocol::ast::*; use crate::protocol::Heap; -use crate::protocol::tokenizer::{TokenBuffer, Token}; -use crate::protocol::input_source2::{InputSource2 as InputSource, ParseError}; +use crate::collections::{StringPool, StringRef}; +use crate::protocol::tokenizer::*; +use crate::protocol::input_source2::{InputSource2 as InputSource, InputPosition2 as InputPosition, InputSpan, ParseError}; +use crate::protocol::symbol_table2::*; + +#[derive(PartialEq, Eq)] +enum ModuleCompilationPhase { + Source, // only source is set + Tokenized, // source is tokenized + DefinitionsScanned, // all definitions are linked to their type class + ImportsResolved, // all imports are added to the symbol table + Parsed, // produced the AST for the module + ValidatedAndLinked, // AST is traversed and has linked the required AST nodes + Typed, // Type inference and checking has been performed +} + +enum KeywordDefinition { + Struct, + Enum, + Union, + Function, + Primitive, + Composite, +} + +impl KeywordDefinition { + fn as_symbol_class(&self) -> SymbolClass { + use KeywordDefinition as KD; + use SymbolClass as SC; + + match self { + KD::Struct => SC::Struct, + KD::Enum => SC::Enum, + KD::Union => SC::Union, + KD::Function => SC::Function, + KD::Primitive | KD::Composite => SC::Component, + } + } +} + +struct Module { + // Buffers + source: InputSource, + tokens: TokenBuffer, + // Identifiers + root_id: RootId, + name: Option<(PragmaId, StringRef)>, + version: Option<(PragmaId, i64)>, + phase: ModuleCompilationPhase, +} struct Ctx<'a> { heap: &'a mut Heap, - source: &'a InputSource, - tokens: &'a TokenBuffer, + symbols: &'a mut SymbolTable, + pool: &'a mut StringPool, +} + +/// Scans the module and finds all module-level type definitions. These will be +/// added to the symbol table such that during AST-construction we know which +/// identifiers point to types. Will also parse all pragmas to determine module +/// names. +pub(crate) struct ASTSymbolPrePass { + symbols: Vec, + pragmas: Vec, + buffer: String, + has_pragma_version: bool, + has_pragma_module: bool, +} + +impl ASTSymbolPrePass { + pub(crate) fn new() -> Self { + Self{ + symbols: Vec::with_capacity(128), + pragmas: Vec::with_capacity(8), + buffer: String::with_capacity(128), + has_pragma_version: false, + has_pragma_module: false, + } + } + + fn reset(&mut self) { + self.symbols.clear(); + self.pragmas.clear(); + self.has_pragma_version = false; + self.has_pragma_module = false; + } + + pub(crate) fn parse(&mut self, modules: &mut [Module], module_idx: usize, ctx: &mut Ctx) -> Result<(), ParseError> { + self.reset(); + + let module = &mut modules[module_idx]; + let module_range = &module.tokens.ranges[0]; + let expected_parent_idx = 0; + let expected_subranges = module_range.subranges; + debug_assert_eq!(module.phase, ModuleCompilationPhase::Tokenized); + debug_assert_eq!(module_range.range_kind, TokenRangeKind::Module); + debug_assert_eq!(module.root_id.index, 0); + + // Preallocate root in the heap + let root_id = ctx.heap.alloc_protocol_description(|this| { + Root{ + this, + pragmas: Vec::new(), + imports: Vec::new(), + definitions: Vec::new(), + } + }); + module.root_id = root_id; + + // Visit token ranges to detect defintions + let mut visited_subranges = 0; + for range_idx in expected_parent_idx + 1..module.tokens.ranges.len() { + // Skip any ranges that do not belong to the module + let cur_range = &module.tokens.ranges[range_idx]; + if cur_range.parent_idx != expected_parent_idx { + continue; + } + + // Parse if it is a definition or a pragma + if cur_range.range_kind == TokenRangeKind::Definition { + self.visit_definition_range(modules, module_idx, ctx, range_idx)?; + } else if cur_range.range_kind == TokenRangeKind::Pragma { + self.visit_pragma_range(modules, module_idx, ctx, range_idx)?; + } + + visited_subranges += 1; + if visited_subranges == expected_subranges { + break; + } + } + + // By now all symbols should have been found: add to symbol table and + // add the parsed pragmas to the preallocated root in the heap. + debug_assert_eq!(visited_subranges, expected_subranges); + ctx.symbols.insert_scoped_symbols(None, SymbolScope::Module(module.root_id), &self.symbols); + + let root = &mut ctx.heap[root_id]; + debug_assert!(root.pragmas.is_empty()); + root.pragmas.extend(&self.pragmas); + + module.phase = ModuleCompilationPhase::DefinitionsScanned; + + Ok(()) + } + + fn visit_pragma_range(&mut self, modules: &mut [Module], module_idx: usize, ctx: &mut Ctx, range_idx: usize) -> Result<(), ParseError> { + let module = &mut modules[module_idx]; + let range = &module.tokens.ranges[range_idx]; + let mut iter = module.tokens.iter_range(range); + + // Consume pragma name + let (pragma_section, pragma_start, _) = consume_pragma(&self.source, &mut iter)?; + + // Consume pragma values + if pragma_section == "#module" { + // Check if name is defined twice within the same file + if self.has_pragma_module { + return Err(ParseError::new_error(&module.source, pragma_start, "module name is defined twice")); + } + + // Consume the domain-name + let (module_name, module_span) = consume_domain_ident(&module.source, &mut iter)?; + if iter.next().is_some() { + return Err(ParseError::new_error(&module.source, iter.last_valid_pos(), "expected end of #module pragma after module name")); + } + + // Add to heap and symbol table + let pragma_span = InputSpan::from_positions(pragma_start, module_span.end); + let module_name = ctx.pool.intern(module_name); + let pragma_id = ctx.heap.alloc_pragma(|this| Pragma::Module(PragmaModule{ + this, + span: pragma_span, + value: Identifier{ span: module_span, value: module_name.clone() }, + })); + self.pragmas.push(pragma_id); + + if let Err(other_module_root_id) = ctx.symbols.insert_module(module_name, module.root_id) { + // Naming conflict + let this_module = &modules[module_idx]; + let other_module = seek_module(modules, other_module_root_id).unwrap(); + let (other_module_pragma_id, _) = other_module.name.unwrap(); + let other_pragma = ctx.heap[other_module_pragma_id].as_module(); + return Err(ParseError::new_error_str_at_span( + &this_module.source, pragma_span, "conflict in module name" + ).with_info_str_at_span( + &other_module.source, other_pragma.span, "other module is defined here" + )); + } + self.has_pragma_module = true; + } else if pragma_section == "#version" { + // Check if version is defined twice within the same file + if self.has_pragma_version { + return Err(ParseError::new_error(&module.source, pragma_start, "module version is defined twice")); + } + + // Consume the version pragma + let (version, version_span) = consume_integer_literal(&module.source, &mut iter, &mut self.buffer)?; + let pragma_id = ctx.heap.alloc_pragma(|this| Pragma::Version(PragmaVersion{ + this, + span: InputSpan::from_positions(pragma_start, version_span.end), + version, + })); + self.pragmas.push(pragma_id); + self.has_pragma_version = true; + } else { + // Custom pragma, maybe we support this in the future, but for now + // we don't. + return Err(ParseError::new_error(&module.source, pragma_start, "illegal pragma name")); + } + + Ok(()) + } + + fn visit_definition_range(&mut self, modules: &mut [Module], module_idx: usize, ctx: &mut Ctx, range_idx: usize) -> Result<(), ParseError> { + let module = &modules[module_idx]; + let range = &module.tokens.ranges[range_idx]; + let definition_span = InputSpan::from_positions( + module.tokens.start_pos(range), + module.tokens.end_pos(range) + ); + let mut iter = module.tokens.iter_range(range); + + // Because we're visiting a definition, we expect an ident that resolves + // to a keyword indicating a definition. + let kw_text = consume_ident_text(&module.source, &mut iter).unwrap(); + let kw = parse_definition_keyword(kw_text).unwrap(); + + // Retrieve identifier and put in temp symbol table + let definition_ident = consume_ident_text(&module.source, &mut iter)?; + let definition_ident = ctx.pool.intern(definition_ident); + let symbol_class = kw.as_symbol_class(); + + // Get the token indicating the end of the definition to get the full + // span of the definition + let last_token = &module.tokens.tokens[range.end - 1]; + debug_assert_eq!(last_token.kind, TokenKind::CloseCurly); + + self.symbols.push(Symbol::new( + module.root_id, + SymbolScope::Module(module.root_id), + definition_span, + symbol_class, + definition_ident + )); + + Ok(()) + } +} + +pub(crate) struct ASTImportPrePass { +} + +impl ASTImportPrePass { + pub(crate) fn parse(&mut self, modules: &mut [Module], module_idx: usize, ctx: &mut Ctx) -> Result<(), ParseError> { + let module = &modules[module_idx]; + let module_range = &module.tokens.ranges[0]; + debug_assert_eq!(module.phase, ModuleCompilationPhase::DefinitionsScanned); + debug_assert_eq!(module_range.range_kind, TokenRangeKind::Module); + + let expected_parent_idx = 0; + let expected_subranges = module_range.subranges; + let mut visited_subranges = 0; + + for range_idx in expected_parent_idx + 1..module.tokens.ranges.len() { + let cur_range = &module.tokens.ranges[range_idx]; + if cur_range.parent_idx != expected_parent_idx { + continue; + } + + visited_subranges += 1; + if cur_range.range_kind == TokenRangeKind::Import { + self.visit_import_range(modules, module_idx, ctx, range_idx)?; + } + + if visited_subranges == expected_subranges { + break; + } + } + + Ok(()) + } + + pub(crate) fn visit_import_range( + &mut self, modules: &mut [Module], module_idx: usize, ctx: &mut Ctx, range_idx: usize + ) -> Result<(), ParseError> { + let module = &modules[module_idx]; + let import_range = &module.tokens.ranges[range_idx]; + debug_assert_eq!(import_range.range_kind, TokenRangeKind::Import); + + let mut iter = module.tokens.iter_range(import_range); + + // Consume "import" + let _import_ident = consume_ident_text(&module.source, &mut iter)?; + debug_assert_eq!(_import_ident, KW_IMPORT); + + // Consume module name + let (module_name, _) = consume_domain_ident(&module.source, &mut iter)?; + + + Ok(()) + } } -// Lexes definitions. Should be the first pass over each of the module files -// after tokenization. Only once all definitions are parsed can we do the full -// AST creation pass. -struct LexerDefinitions { +fn consume_domain_ident<'a>(source: &'a InputSource, iter: &mut TokenIter) -> Result<(&'a [u8], InputSpan), ParseError> { + let (_, name_start, mut name_end) = consume_ident(source, iter)?; + while let Some(TokenKind::Dot) = iter.next() { + consume_dot(source, iter)?; + let (_, _, new_end) = consume_ident(source, iter)?; + name_end = new_end; + } + Ok((source.section(name_start, name_end), InputSpan::from_positions(name_start, name_end))) +} + +fn consume_dot<'a>(source: &'a InputSource, iter: &mut TokenIter) -> Result<(), ParseError> { + if Some(TokenKind::Dot) != iter.next() { + return Err(ParseError::new_error_str_at_pos(source, iter.last_valid_pos(), "expected a dot")); + } + iter.consume(); + Ok(()) } -impl LexerDefinitions { - pub(crate) fn parse(ctx: &mut Ctx) -> Result<(), ParseError> { - debug_assert!(ctx.tokens.ranges.len() > 0); +fn consume_integer_literal(source: &InputSource, iter: &mut TokenIter, buffer: &mut String) -> Result<(u64, InputSpan), ParseError> { + if Some(TokenKind::Integer) != iter.next() { + return Err(ParseError::new_error_str_at_pos(source, iter.last_valid_pos(), "expected an integer literal")); } + let (start_pos, end_pos) = iter.next_range(); + iter.consume(); - pub(crate) fn parse_definition(heap: &mut Heap, source: &InputSource, range: &TokenRang) + let integer_text = source.section(start_pos, end_pos); + + // Determine radix and offset from prefix + let (radix, input_offset, radix_name) = + if integer_text.starts_with(b"0b") || integer_text.starts_with(b"0B") { + // Binary number + (2, 2, "binary") + } else if integer_text.starts_with(b"0o") || integer_text.starts_with(b"0O") { + // Octal number + (8, 2, "octal") + } else if integer_text.starts_with(b"0x") || integer_text.starts_with(b"0X") { + // Hexadecimal number + (16, 2, "hexadecimal") + } else { + (10, 0, "decimal") + }; + + // Take out any of the separating '_' characters + buffer.clear(); + for char_idx in input_offset..integer_text.len() { + let char = integer_text[char_idx]; + if char == b'_' { + continue; + } + if !char.is_ascii_digit() { + return Err(ParseError::new_error(source, start_pos, "incorrectly formatted integer")); + } + buffer.push(char::from(char)); + } + + // Use the cleaned up string to convert to integer + match u64::from_str_radix(&buffer, radix) { + Ok(number) => Ok((number, InputSpan::from_positions(start_pos, end_pos))), + Err(_) => Err( + ParseError::new_error(source, start_pos, "incorrectly formatted integer") + ), + } +} + +fn seek_module(modules: &[Module], root_id: RootId) -> Option<&Module> { + for module in modules { + if module.root_id == root_id { + return Some(module) + } + } + + return None +} + +fn consume_pragma<'a>(source: &'a InputSource, iter: &mut TokenIter) -> Result<(&'a [u8], InputPosition, InputPosition), ParseError> { + if Some(TokenKind::Pragma) != iter.next() { + return Err(ParseError::new_error(source, iter.last_valid_pos(), "expected a pragma")); + } + let (pragma_start, pragma_end) = iter.next_range(); + iter.consume(); + Ok((source.section(pragma_start, pragma_end), pragma_start, pragma_end)) +} + +fn consume_ident_text<'a>(source: &'a InputSource, iter: &mut TokenIter) -> Result<&'a [u8], ParseError> { + if Some(TokenKind::Ident) != iter.next() { + return Err(ParseError::new_error(source, iter.last_valid_pos(), "expected an identifier")); + } + let (ident_start, ident_end) = iter.next_range(); + iter.consume(); + Ok(source.section(ident_start, ident_end)) +} + +fn consume_ident<'a>(source: &'a InputSource, iter: &mut TokenIter) -> Result<(&'a [u8], InputPosition, InputPosition), ParseError> { + if Some(TokenKind::Ident) != iter.next() { + return Err(ParseError::new_error(sourcee, iter.last_valid_pos(), "expected an identifier")); + } + let (ident_start, ident_end) = iter.next_range(); + iter.consume(); + Ok((source.section(ident_start, ident_end), ident_start, ident_end)) +} + +fn parse_definition_keyword(keyword: &[u8]) -> Option { + match keyword { + KW_STRUCT => Some(Keyword::Struct), + KW_ENUM => Some(Keyword::Enum), + KW_UNION => Some(Keyword::Union), + KW_FUNCTION => Some(Keyword::Function), + KW_PRIMITIVE => Some(Keyword::Primitive), + KW_COMPOSITE => Some(Keyword::Composite), + _ => None + } } \ No newline at end of file diff --git a/src/protocol/parser/mod.rs b/src/protocol/parser/mod.rs index 2ecd781606c3c13ddfd54b1bd62533da5564bc26..0110d950b8d80c1db45d1a5286816baf75a23042 100644 --- a/src/protocol/parser/mod.rs +++ b/src/protocol/parser/mod.rs @@ -1,5 +1,6 @@ mod depth_visitor; pub(crate) mod symbol_table; +pub(crate) mod symbol_table2; pub(crate) mod type_table; mod type_resolver; mod visitor; diff --git a/src/protocol/parser/symbol_table2.rs b/src/protocol/parser/symbol_table2.rs new file mode 100644 index 0000000000000000000000000000000000000000..fecb9d079df26a592365855742a6eecb355f8620 --- /dev/null +++ b/src/protocol/parser/symbol_table2.rs @@ -0,0 +1,107 @@ +use std::collections::HashMap; +use std::collections::hash_map::Entry; + +use crate::protocol::input_source2::*; +use crate::protocol::ast::*; +use crate::collections::*; + +#[derive(Clone, Copy, PartialEq, Eq)] +pub enum SymbolScope { + Module(RootId), + Definition(DefinitionId), +} + +#[derive(Clone, Copy, PartialEq, Eq)] +pub enum SymbolClass { + Module, + Struct, + Enum, + Union, + Function, + Component +} + +struct ScopedSymbols { + scope: SymbolScope, + parent_scope: Option, + child_scopes: Vec, + start: usize, + end: usize, +} + +pub struct Symbol { + // Definition location + pub defined_in_module: RootId, + pub defined_in_scope: SymbolScope, + pub definition_span: InputSpan, // full span of definition + // Introduction location (if imported instead of defined) + + // Symbol properties + pub class: SymbolClass, + pub name: StringRef, + pub definition: Option, +} + +impl Symbol { + pub(crate) fn new(root_id: RootId, scope: SymbolScope, span: InputSpan, class: SymbolClass, name: StringRef) -> Self { + Self{ + defined_in_module: root_id, + defined_in_scope: scope, + definition_span: span, + class, + name, + definition: None, + } + } +} + +pub struct SymbolTable { + module_lookup: HashMap, + scope_lookup: HashMap, + symbols: Vec, +} + +impl SymbolTable { + /// Inserts a new module by its name. Upon module naming conflict the + /// previously associated `RootId` will be returned. + pub(crate) fn insert_module(&mut self, module_name: StringRef, root_id: RootId) -> Result<(), RootId> { + match self.module_lookup.entry(module_name) { + Entry::Occupied(v) => { + Err(*v.get()) + }, + Entry::Vacant(v) => { + v.insert(root_id); + Ok(()) + } + } + } + + /// Inserts a new scope with defined symbols. The `parent_scope` must + /// already be added to the symbol table. The symbols are expected to come + /// from a temporary buffer and are copied inside the symbol table. Will + /// return an error if there is a naming conflict. + pub(crate) fn insert_scoped_symbols( + &mut self, parent_scope: Option, within_scope: SymbolScope, symbols: &[Symbol] + ) -> Result<(), ParseError> { + // Add scoped symbols + let old_num_symbols = self.symbols.len(); + + let new_scope = ScopedSymbols { + scope: within_scope, + parent_scope, + child_scopes: Vec::new(), + start: old_num_symbols, + end: old_num_symbols + symbols.len(), + }; + + self.symbols.extend(symbols); + self.scope_lookup.insert(within_scope, new_scope); + + if let Some(parent_scope) = parent_scope.as_ref() { + let parent = self.scope_lookup.get_mut(parent_scope).unwrap(); + parent.child_scopes.push(within_scope); + } + + Ok(()) + } +} \ No newline at end of file diff --git a/src/protocol/tokenizer/mod.rs b/src/protocol/tokenizer/mod.rs index fe6105ad9dae355136099a86932474e7a287bed0..ebd4ee5555bd52674797aebbdc998b35c0d3e5d9 100644 --- a/src/protocol/tokenizer/mod.rs +++ b/src/protocol/tokenizer/mod.rs @@ -1,7 +1,19 @@ - -use crate::protocol::input_source2::{InputSource2 as InputSource, ParseError, InputPosition2 as InputPosition, InputSpan}; - -#[derive(Debug, Clone, Copy, PartialEq, Eq)] +use crate::protocol::input_source2::{ + InputSource2 as InputSource, + ParseError, + InputPosition2 as InputPosition, + InputSpan +}; + +pub(crate) const KW_STRUCT: &'static [u8] = b"struct"; +pub(crate) const KW_ENUM: &'static [u8] = b"enum"; +pub(crate) const KW_UNION: &'static [u8] = b"union"; +pub(crate) const KW_FUNCTION: &'static [u8] = b"func"; +pub(crate) const KW_PRIMITIVE: &'static [u8] = b"primitive"; +pub(crate) const KW_COMPOSITE: &'static [u8] = b"composite"; +pub(crate) const KW_IMPORT: &'static [u8] = b"import"; + +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] pub(crate) enum TokenKind { // Variable-character tokens, followed by a SpanEnd token Ident, // regular identifier @@ -11,7 +23,7 @@ pub(crate) enum TokenKind { Character, // character literal, range includes `'` LineComment, // line comment, range includes leading `//`, but not newline BlockComment, // block comment, range includes leading `/*` and trailing `*/` - // Punctuation + // Punctuation (single character) Exclamation, // ! Question, // ? Pound, // # @@ -24,48 +36,68 @@ pub(crate) enum TokenKind { CloseParen, // ) CloseSquare, // ] Colon, // : - ColonColon, // :: Comma, // , Dot, // . - DotDot, // .. SemiColon, // ; Quote, // ' DoubleQuote, // " - // Operator-like + // Operator-like (single character) At, // @ Plus, // + - PlusPlus, // ++ - PlusEquals, // += Minus, // - + Star, // * + Slash, // / + Percent, // % + Caret, // ^ + And, // & + Or, // | + Tilde, // ~ + Equal, // = + // Punctuation (two characters) + ColonColon, // :: + DotDot, // .. ArrowRight, // -> + // Operator-like (two characters) + PlusPlus, // ++ + PlusEquals, // += MinusMinus, // -- MinusEquals, // -= - Star, // * StarEquals, // *= - Slash, // / SlashEquals, // /= - Percent, // % PercentEquals, // %= - Caret, // ^ CaretEquals, // ^= - And, // & AndAnd, // && AndEquals, // &= - Or, // | OrOr, // || OrEquals, // |= - Tilde, // ~ - Equal, // = EqualEqual, // == NotEqual, // != ShiftLeft, // << - ShiftLeftEquals,// <<= ShiftRight, // >> + // Operator-like (three characters) + ShiftLeftEquals,// <<= ShiftRightEquals, // >>= // Special marker token to indicate end of variable-character tokens SpanEnd, } +impl TokenKind { + fn has_span_end(&self) -> bool { + return *self <= TokenKind::BlockComment + } + + fn num_characters(&self) -> u32 { + debug_assert!(!self.has_span_end() && *self != TokenKind::SpanEnd); + if *self <= TokenKind::Equal { + 1 + } else if *self <= TokenKind::ShiftRight { + 2 + } else { + 3 + } + } +} + pub(crate) struct Token { pub kind: TokenKind, pub pos: InputPosition, @@ -86,16 +118,18 @@ pub(crate) enum TokenRangeKind { Code, } +/// TODO: Add first_child and next_sibling indices for slightly faster traversal #[derive(Debug)] -struct TokenRange { +pub(crate) struct TokenRange { // Index of parent in `TokenBuffer.ranges`, does not have a parent if the // range kind is Module, in that case the parent index points to itself. - parent_idx: usize, - range_kind: TokenRangeKind, - curly_depth: i32, - start: usize, - end: usize, - subranges: usize, + pub parent_idx: usize, + pub range_kind: TokenRangeKind, + pub curly_depth: u32, + // InputPosition offset is limited to u32, so token ranges can be as well. + pub start: u32, + pub end: u32, + pub subranges: u32, } pub(crate) struct TokenBuffer { @@ -107,26 +141,126 @@ impl TokenBuffer { pub(crate) fn new() -> Self { Self{ tokens: Vec::new(), ranges: Vec::new() } } + + pub(crate) fn iter_range<'a>(&'a self, range: &TokenRange) -> TokenIter<'a> { + TokenIter::new(self, range.start as usize, range.end as usize) + } + + pub(crate) fn start_pos(&self, range: &TokenRange) -> InputPosition { + self.tokens[range.start].pos + } + + pub(crate) fn end_pos(&self, range: &TokenRange) -> InputPosition { + let last_token = &self.tokens[range.end - 1]; + if last_token.kind == TokenKind::SpanEnd { + return last_token.pos + } else { + debug_assert!(!last_token.kind.has_span_end()); + return last_token.pos.with_offset(last_token.kind.num_characters()); + } + } +} + +pub(crate) struct TokenIter<'a> { + tokens: &'a Vec, + cur: usize, + end: usize, } -// Tokenizer is a reusable parser to tokenize multiple source files using the -// same allocated buffers. In a well-formed program, we produce a consistent -// tree of token ranges such that we may identify tokens that represent a -// defintion or an import before producing the entire AST. -// -// If the program is not well-formed then the tree may be inconsistent, but we -// will detect this once we transform the tokens into the AST. Maybe we want to -// detect a mismatch in opening/closing curly braces in the future? +impl<'a> TokenIter<'a> { + fn new(buffer: &'a TokenBuffer, start: usize, end: usize) -> Self { + Self{ tokens: &buffer.tokens, cur: start, end } + } + + /// Returns the next token (may include comments), or `None` if at the end + /// of the range. + pub(crate) fn next_including_comments(&self) -> Option { + if self.cur >= self.end { + return None; + } + + let token = &self.tokens[self.cur]; + Some(token.kind) + } + + /// Returns the next token (but skips over comments), or `None` if at the + /// end of the range + pub(crate) fn next(&mut self) -> Option { + while let Some(token_kind) = self.next() { + if token_kind != TokenKind::LineComment && token_kind != TokenKind::BlockComment { + return Some(token_kind); + } + self.consume(); + } + + return None + } + + /// Returns the start position belonging to the token returned by `next`. If + /// there is not a next token, then we return the end position of the + /// previous token. + pub(crate) fn last_valid_pos(&self) -> InputPosition { + if self.cur < self.end { + // Return token position + return self.tokens[self.cur].pos + } + + // Return previous token end + let token = &self.tokens[self.cur - 1]; + return if token.kind == TokenKind::SpanEnd { + token.pos + } else { + token.pos.with_offset(token.kind.num_characters()); + }; + } + + /// Returns the token range belonging to the token returned by `next`. This + /// assumes that we're not at the end of the range we're iterating over. + pub(crate) fn next_range(&self) -> (InputPosition, InputPosition) { + debug_assert!(self.cur < self.end); + let token = &self.tokens[self.cur]; + if token.kind.has_span_end() { + let span_end = &self.tokens[self.cur + 1]; + debug_assert_eq!(span_end.kind, TokenKind::SpanEnd); + (token.pos, span_end.pos) + } else { + let offset = token.kind.num_characters(); + (token.pos, token.pos.with_offset(offset)) + } + } + + pub(crate) fn consume(&mut self) { + if let Some(kind) = self.next() { + if kind.has_span_end() { + self.cur += 2; + } else { + self.cur += 1; + } + } + } +} + +/// Tokenizer is a reusable parser to tokenize multiple source files using the +/// same allocated buffers. In a well-formed program, we produce a consistent +/// tree of token ranges such that we may identify tokens that represent a +/// defintion or an import before producing the entire AST. +/// +/// If the program is not well-formed then the tree may be inconsistent, but we +/// will detect this once we transform the tokens into the AST. To ensure a +/// consistent AST-producing phase we will require the import to have balanced +/// curly braces pub(crate) struct Tokenizer { - // Signed because programmer might have placed too many closing curly braces - curly_depth: i32, + // Stack of input positions of opening curly braces, used to detect + // unmatched opening braces, unmatched closing braces are detected + // immediately. + curly_stack: Vec, // Points to an element in the `TokenBuffer.ranges` variable. stack_idx: usize, } impl Tokenizer { pub(crate) fn new() -> Self { - Self{ curly_depth: 0, stack_idx: 0 } + Self{ curly_stack: Vec::with_capacity(32), stack_idx: 0 } } pub(crate) fn tokenize(&mut self, source: &mut InputSource, target: &mut TokenBuffer) -> Result<(), ParseError> { // Assert source and buffer are at start @@ -150,7 +284,7 @@ impl Tokenizer { // Main tokenization loop while let Some(c) = source.next() { - let token_index = target.tokens.len(); + let token_index = target.tokens.len() as u32; if is_char_literal_start(c) { self.consume_char_literal(source, target)?; @@ -180,28 +314,37 @@ impl Tokenizer { if contained_newline { let range = &target.ranges[self.stack_idx]; if range.range_kind == TokenRangeKind::Pragma { - self.pop_range(target, target.tokens.len()); + self.pop_range(target, target.tokens.len() as u32); } } } else { let was_punctuation = self.maybe_parse_punctuation(c, source, target)?; - if let Some(token) = was_punctuation { + if let Some((token, token_pos)) = was_punctuation { if token == TokenKind::OpenCurly { - self.curly_depth += 1; + self.curly_stack.push(token_pos); } else if token == TokenKind::CloseCurly { // Check if this marks the end of a range we're // currently processing - self.curly_depth -= 1; + if self.curly_stack.is_empty() { + return Err(ParseError::new_error( + source, token_pos, "unmatched closing curly brace '}'" + )); + } + + self.curly_stack.pop(); let range = &target.ranges[self.stack_idx]; if range.range_kind == TokenRangeKind::Definition && range.curly_depth == self.curly_depth { - self.pop_range(target, target.tokens.len()); + self.pop_range(target, target.tokens.len() as u32); } + + // Exit early if we have more closing curly braces than + // opening curly braces } else if token == TokenKind::SemiColon { // Check if this marks the end of an import let range = &target.ranges[self.stack_idx]; if range.range_kind == TokenRangeKind::Import { - self.pop_range(target, target.tokens.len()); + self.pop_range(target, target.tokens.len() as u32); } } } else { @@ -215,6 +358,15 @@ impl Tokenizer { return Err(error); } + if !self.curly_stack.is_empty() { + // Let's not add a lot of heuristics and just tell the programmer + // that something is wrong + let last_unmatched_open = self.curly_stack.pop().unwrap(); + return Err(ParseError::new_error( + source, last_unmatched_open, "unmatched opening curly brace '{'" + )); + } + Ok(()) } @@ -226,7 +378,9 @@ impl Tokenizer { return first_char == b'/' && Some(b'*') == source.lookahead(1); } - fn maybe_parse_punctuation(&mut self, first_char: u8, source: &mut InputSource, target: &mut TokenBuffer) -> Result, ParseError> { + fn maybe_parse_punctuation( + &mut self, first_char: u8, source: &mut InputSource, target: &mut TokenBuffer + ) -> Result, ParseError> { debug_assert!(first_char != b'#', "'#' needs special handling"); debug_assert!(first_char != b'\'', "'\'' needs special handling"); debug_assert!(first_char != b'"', "'\"' needs special handling"); @@ -412,7 +566,7 @@ impl Tokenizer { } target.tokens.push(Token::new(token_kind, pos)); - Ok(Some(token_kind)) + Ok(Some((token_kind, pos))) } fn consume_char_literal(&mut self, source: &mut InputSource, target: &mut TokenBuffer) -> Result<(), ParseError> { @@ -610,7 +764,7 @@ impl Tokenizer { let end_pos = source.pos(); target.tokens.push(Token::new(TokenKind::Ident, begin_pos)); target.tokens.push(Token::new(TokenKind::SpanEnd, end_pos)); - Ok(source.section(begin_pos.offset, end_pos.offset)) + Ok(source.section(begin_pos, end_pos)) } fn consume_number(&mut self, source: &mut InputSource, target: &mut TokenBuffer) -> Result<(), ParseError> { @@ -656,23 +810,18 @@ impl Tokenizer { } /// Pushes a new token range onto the stack in the buffers. - fn push_range(&mut self, target: &mut TokenBuffer, range_kind: TokenRangeKind, first_token: usize) { + fn push_range(&mut self, target: &mut TokenBuffer, range_kind: TokenRangeKind, first_token: u32) { let cur_range = &mut target.ranges[self.stack_idx]; - println!( - "DEBUG: push_range [1] | stack_idx: {}, range_end: {}, first_token: {}", - self.stack_idx, cur_range.end, first_token - ); - // If we have just popped a range and then push a new range, then the // first token is equal to the last token registered on the current // range. If not, then we had some intermediate tokens that did not // belong to a particular kind of token range: hence we insert an // intermediate "code" range. if cur_range.end != first_token { - println!("DEBUG: push_range [2] | inserting code range"); let code_start = cur_range.end; cur_range.end = first_token; + debug_assert_ne!(code_start, first_token); cur_range.subranges += 1; target.ranges.push(TokenRange{ parent_idx: self.stack_idx, @@ -685,10 +834,6 @@ impl Tokenizer { } // Insert a new range - println!( - "DEBUG: push_range [3] | kind: {:?}, parent_idx: {}, stack_idx: {}", - range_kind, self.stack_idx, target.ranges.len() - ); let parent_idx = self.stack_idx; self.stack_idx = target.ranges.len(); target.ranges.push(TokenRange{ @@ -701,26 +846,19 @@ impl Tokenizer { }); } - fn pop_range(&mut self, target: &mut TokenBuffer, end_index: usize) { + fn pop_range(&mut self, target: &mut TokenBuffer, end_index: u32) { let last = &mut target.ranges[self.stack_idx]; debug_assert!(self.stack_idx != last.parent_idx, "attempting to pop top-level range"); // Fix up the current range before going back to parent - println!( - "DEBUG: pop_range [1] | stack_idx: {}, kind: {:?}, start: {}, old_end: {}, new_end: {}", - self.stack_idx, last.range_kind, last.start, last.end, end_index - ); last.end = end_index; + debug_assert_ne!(last.start, end_index); // Go back to parent self.stack_idx = last.parent_idx; let parent = &mut target.ranges[self.stack_idx]; parent.end = end_index; parent.subranges += 1; - println!( - "DEBUG: pop_range [2] | returning to kind: {:?}, idx: {}, new_end: {}", - parent.range_kind, self.stack_idx, end_index - ); } @@ -739,16 +877,16 @@ impl Tokenizer { // Helpers for characters fn demarks_definition(ident: &[u8]) -> bool { return - ident == b"struct" || - ident == b"enum" || - ident == b"union" || - ident == b"func" || - ident == b"primitive" || - ident == b"composite" + ident == KW_STRUCT || + ident == KW_ENUM || + ident == KW_UNION || + ident == KW_FUNCTION || + ident == KW_PRIMITIVE || + ident == KW_COMPOSITE } fn demarks_import(ident: &[u8]) -> bool { - return ident == b"import"; + return ident == KW_IMPORT; } fn is_whitespace(c: u8) -> bool { @@ -856,7 +994,7 @@ mod tests { let (_, end) = iter.next().unwrap(); println!("[{}] {:?} ......", idx, token.kind); assert_eq!(end.kind, TokenKind::SpanEnd); - let text = source.section(token.pos.offset, end.pos.offset); + let text = source.section(token.pos, end.pos); println!("{}", String::from_utf8_lossy(text)); }, _ => {