use crate::protocol::input_source::{ InputPosition as InputPosition, InputSpan }; /// Represents a particular kind of token. Some tokens represent /// variable-character tokens. Such a token is always followed by a /// `TokenKind::SpanEnd` token. #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] pub enum TokenKind { // Variable-character tokens, followed by a SpanEnd token Ident, // regular identifier Pragma, // identifier with prefixed `#`, range includes `#` Integer, // integer literal Bytestring, // string literal, interpreted as byte array, range includes 'b"' String, // string literal, range includes `"` Character, // character literal, range includes `'` LineComment, // line comment, range includes leading `//`, but not newline BlockComment, // block comment, range includes leading `/*` and trailing `*/` // Punctuation (single character) Exclamation, // ! Question, // ? Pound, // # OpenAngle, // < OpenCurly, // { OpenParen, // ( OpenSquare, // [ CloseAngle, // > CloseCurly, // } CloseParen, // ) CloseSquare, // ] Colon, // : Comma, // , Dot, // . SemiColon, // ; // Operator-like (single character) At, // @ Plus, // + Minus, // - Star, // * Slash, // / Percent, // % Caret, // ^ And, // & Or, // | Tilde, // ~ Equal, // = // Punctuation (two characters) ColonColon, // :: DotDot, // .. ArrowRight, // -> // Operator-like (two characters) AtEquals, // @= PlusPlus, // ++ PlusEquals, // += MinusMinus, // -- MinusEquals, // -= StarEquals, // *= SlashEquals, // /= PercentEquals, // %= CaretEquals, // ^= AndAnd, // && AndEquals, // &= OrOr, // || OrEquals, // |= EqualEqual, // == NotEqual, // != ShiftLeft, // << LessEquals, // <= ShiftRight, // >> GreaterEquals, // >= // Operator-like (three characters) ShiftLeftEquals,// <<= ShiftRightEquals, // >>= // Special marker token to indicate end of variable-character tokens SpanEnd, } impl TokenKind { /// Returns true if the next expected token is the special `TokenKind::SpanEnd` token. This is /// the case for tokens of variable length (e.g. an identifier). pub(crate) fn has_span_end(&self) -> bool { return *self <= TokenKind::BlockComment } /// Returns the number of characters associated with the token. May only be called on tokens /// that do not have a variable length. fn num_characters(&self) -> u32 { debug_assert!(!self.has_span_end() && *self != TokenKind::SpanEnd); if *self <= TokenKind::Equal { 1 } else if *self <= TokenKind::GreaterEquals { 2 } else { 3 } } /// Returns the characters that are represented by the token, may only be called on tokens that /// do not have a variable length. pub fn token_chars(&self) -> &'static str { debug_assert!(!self.has_span_end() && *self != TokenKind::SpanEnd); use TokenKind as TK; match self { TK::Exclamation => "!", TK::Question => "?", TK::Pound => "#", TK::OpenAngle => "<", TK::OpenCurly => "{", TK::OpenParen => "(", TK::OpenSquare => "[", TK::CloseAngle => ">", TK::CloseCurly => "}", TK::CloseParen => ")", TK::CloseSquare => "]", TK::Colon => ":", TK::Comma => ",", TK::Dot => ".", TK::SemiColon => ";", TK::At => "@", TK::Plus => "+", TK::Minus => "-", TK::Star => "*", TK::Slash => "/", TK::Percent => "%", TK::Caret => "^", TK::And => "&", TK::Or => "|", TK::Tilde => "~", TK::Equal => "=", TK::ColonColon => "::", TK::DotDot => "..", TK::ArrowRight => "->", TK::AtEquals => "@=", TK::PlusPlus => "++", TK::PlusEquals => "+=", TK::MinusMinus => "--", TK::MinusEquals => "-=", TK::StarEquals => "*=", TK::SlashEquals => "/=", TK::PercentEquals => "%=", TK::CaretEquals => "^=", TK::AndAnd => "&&", TK::AndEquals => "&=", TK::OrOr => "||", TK::OrEquals => "|=", TK::EqualEqual => "==", TK::NotEqual => "!=", TK::ShiftLeft => "<<", TK::LessEquals => "<=", TK::ShiftRight => ">>", TK::GreaterEquals => ">=", TK::ShiftLeftEquals => "<<=", TK::ShiftRightEquals => ">>=", // Lets keep these in explicitly for now, in case we want to add more symbols TK::Ident | TK::Pragma | TK::Integer | TK::Bytestring | TK::String | TK::Character | TK::LineComment | TK::BlockComment | TK::SpanEnd => unreachable!(), } } } /// Represents a single token at a particular position. pub struct Token { pub kind: TokenKind, pub pos: InputPosition, } impl Token { pub(crate) fn new(kind: TokenKind, pos: InputPosition) -> Self { Self{ kind, pos } } } #[derive(Debug, Clone, Copy)] pub enum TokenMarkerKind { Pragma, Import, Definition, } /// A marker for a specific token. These are stored separately from the array of /// tokens. These are used for initial symbol, module name, and import /// discovery. #[derive(Debug)] pub struct TokenMarker { pub kind: TokenMarkerKind, pub curly_depth: u32, // Indices into token buffer. The first token is inclusive and set upon // tokenization, the last token is set at a later stage in parsing (e.g. // at symbol discovery we may parse some of the `Pragma` tokens and set the // last parsed token) pub first_token: u32, pub last_token: u32, pub handled: bool, } pub struct TokenBuffer { pub tokens: Vec, pub markers: Vec, } impl TokenBuffer { pub(crate) fn new() -> Self { return Self{ tokens: Vec::new(), markers: Vec::new(), }; } pub(crate) fn iter_range( &self, inclusive_start: u32, exclusive_end: Option ) -> TokenIter { let exclusive_end = exclusive_end.unwrap_or(self.tokens.len() as u32) as usize; debug_assert!(exclusive_end <= self.tokens.len()); TokenIter::new(self, inclusive_start as usize, exclusive_end) } } /// Iterator over tokens within a specific `TokenRange`. pub(crate) struct TokenIter<'a> { tokens: &'a Vec, cur: usize, end: usize, } impl<'a> TokenIter<'a> { fn new(buffer: &'a TokenBuffer, start: usize, end: usize) -> Self { Self{ tokens: &buffer.tokens, cur: start, end } } /// Returns the next token (may include comments), or `None` if at the end /// of the range. pub(crate) fn next_including_comments(&self) -> Option { if self.cur >= self.end { return None; } let token = &self.tokens[self.cur]; Some(token.kind) } /// Returns the next token (but skips over comments), or `None` if at the /// end of the range pub(crate) fn next(&mut self) -> Option { while let Some(token_kind) = self.next_including_comments() { if token_kind != TokenKind::LineComment && token_kind != TokenKind::BlockComment { return Some(token_kind); } self.consume(); } return None } /// Peeks ahead by one token (i.e. the one that comes after `next()`), and /// skips over comments pub(crate) fn peek(&self) -> Option { for next_idx in self.cur + 1..self.end { let next_kind = self.tokens[next_idx].kind; if next_kind != TokenKind::LineComment && next_kind != TokenKind::BlockComment && next_kind != TokenKind::SpanEnd { return Some(next_kind); } } return None; } /// Returns the start position belonging to the token returned by `next`. If /// there is not a next token, then we return the end position of the /// previous token. pub(crate) fn last_valid_pos(&self) -> InputPosition { if self.cur < self.end { // Return token position return self.tokens[self.cur].pos } // Return previous token end let token = &self.tokens[self.cur - 1]; return if token.kind == TokenKind::SpanEnd { token.pos } else { token.pos.with_offset(token.kind.num_characters()) }; } /// Assumes the token is not at the end and returns the starting position /// belonging to the token returned by `next`. pub(crate) fn next_start_position(&self) -> InputPosition { debug_assert!(self.cur < self.end); return self.tokens[self.cur].pos; } /// Returns the token range belonging to the token returned by `next`. This /// assumes that we're not at the end of the range we're iterating over. pub(crate) fn next_positions(&self) -> (InputPosition, InputPosition) { debug_assert!(self.cur < self.end); let token = &self.tokens[self.cur]; if token.kind.has_span_end() { let span_end = &self.tokens[self.cur + 1]; debug_assert_eq!(span_end.kind, TokenKind::SpanEnd); (token.pos, span_end.pos) } else { let offset = token.kind.num_characters(); (token.pos, token.pos.with_offset(offset)) } } /// See `next_positions` pub(crate) fn next_span(&self) -> InputSpan { let (begin, end) = self.next_positions(); return InputSpan::from_positions(begin, end) } /// Advances the iterator to the next (meaningful) token. pub(crate) fn consume(&mut self) { if let Some(kind) = self.next_including_comments() { if kind.has_span_end() { self.cur += 2; } else { self.cur += 1; } } } pub(crate) fn token_index(&self) -> u32 { return self.cur as u32; } /// Saves the current iteration position, may be passed to `load` to return /// the iterator to a previous position. pub(crate) fn save(&self) -> (usize, usize) { (self.cur, self.end) } pub(crate) fn load(&mut self, saved: (usize, usize)) { self.cur = saved.0; self.end = saved.1; } }