diff --git a/src/protocol/parser/tokens.rs b/src/protocol/parser/tokens.rs index ccf6982e268efdeaf5a532c9644a6803fd9d148a..2c1de3259841299875c763f62d3ab2655d576ea0 100644 --- a/src/protocol/parser/tokens.rs +++ b/src/protocol/parser/tokens.rs @@ -12,6 +12,7 @@ pub enum TokenKind { Ident, // regular identifier Pragma, // identifier with prefixed `#`, range includes `#` Integer, // integer literal + Bytestring, // string literal, interpreted as byte array, range includes 'b"' String, // string literal, range includes `"` Character, // character literal, range includes `'` LineComment, // line comment, range includes leading `//`, but not newline @@ -78,7 +79,7 @@ pub enum TokenKind { impl TokenKind { /// Returns true if the next expected token is the special `TokenKind::SpanEnd` token. This is /// the case for tokens of variable length (e.g. an identifier). - fn has_span_end(&self) -> bool { + pub(crate) fn has_span_end(&self) -> bool { return *self <= TokenKind::BlockComment } @@ -152,7 +153,8 @@ impl TokenKind { TK::ShiftLeftEquals => "<<=", TK::ShiftRightEquals => ">>=", // Lets keep these in explicitly for now, in case we want to add more symbols - TK::Ident | TK::Pragma | TK::Integer | TK::String | TK::Character | + TK::Ident | TK::Pragma | TK::Integer | + TK::Bytestring | TK::String | TK::Character | TK::LineComment | TK::BlockComment | TK::SpanEnd => unreachable!(), } } @@ -170,64 +172,48 @@ impl Token { } } -/// The kind of token ranges that are specially parsed by the tokenizer. -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub enum TokenRangeKind { - Module, +#[derive(Debug, Clone, Copy)] +pub enum TokenMarkerKind { Pragma, Import, Definition, - Code, } -pub const NO_RELATION: i32 = -1; -pub const NO_SIBLING: i32 = NO_RELATION; - -/// A range of tokens with a specific meaning. Such a range is part of a tree -/// where each parent tree envelops all of its children. +/// A marker for a specific token. These are stored separately from the array of +/// tokens. These are used for initial symbol, module name, and import +/// discovery. #[derive(Debug)] -pub struct TokenRange { - // Index of parent in `TokenBuffer.ranges`, does not have a parent if the - // range kind is Module, in that case the parent index is -1. - pub parent_idx: i32, - pub range_kind: TokenRangeKind, +pub struct TokenMarker { + pub kind: TokenMarkerKind, pub curly_depth: u32, - // Offsets into `TokenBuffer.ranges`: the tokens belonging to this range. - pub start: u32, // first token (inclusive index) - pub end: u32, // last token (exclusive index) - // Child ranges - pub num_child_ranges: u32, // Number of subranges - pub first_child_idx: i32, // First subrange (or -1 if no subranges) - pub last_child_idx: i32, // Last subrange (or -1 if no subranges) - pub next_sibling_idx: i32, // Next subrange (or -1 if no next subrange) + // Indices into token buffer. The first token is inclusive and set upon + // tokenization, the last token is set at a later stage in parsing (e.g. + // at symbol discovery we may parse some of the `Pragma` tokens and set the + // last parsed token) + pub first_token: u32, + pub last_token: u32, + pub handled: bool, } pub struct TokenBuffer { pub tokens: Vec, - pub ranges: Vec, + pub markers: Vec, } impl TokenBuffer { pub(crate) fn new() -> Self { - Self{ tokens: Vec::new(), ranges: Vec::new() } - } - - pub(crate) fn iter_range<'a>(&'a self, range: &TokenRange) -> TokenIter<'a> { - TokenIter::new(self, range.start as usize, range.end as usize) - } - - pub(crate) fn start_pos(&self, range: &TokenRange) -> InputPosition { - self.tokens[range.start as usize].pos + return Self{ + tokens: Vec::new(), + markers: Vec::new(), + }; } - pub(crate) fn end_pos(&self, range: &TokenRange) -> InputPosition { - let last_token = &self.tokens[range.end as usize - 1]; - if last_token.kind == TokenKind::SpanEnd { - return last_token.pos - } else { - debug_assert!(!last_token.kind.has_span_end()); - return last_token.pos.with_offset(last_token.kind.num_characters()); - } + pub(crate) fn iter_range( + &self, inclusive_start: u32, exclusive_end: Option + ) -> TokenIter { + let exclusive_end = exclusive_end.unwrap_or(self.tokens.len() as u32) as usize; + debug_assert!(exclusive_end <= self.tokens.len()); + TokenIter::new(self, inclusive_start as usize, exclusive_end) } } @@ -337,6 +323,10 @@ impl<'a> TokenIter<'a> { } } + pub(crate) fn token_index(&self) -> u32 { + return self.cur as u32; + } + /// Saves the current iteration position, may be passed to `load` to return /// the iterator to a previous position. pub(crate) fn save(&self) -> (usize, usize) {