Changeset - fc2d65a1b906
[Not reviewed]
src/collections/scoped_buffer.rs
Show inline comments
 
use std::iter::FromIterator;
 

	
 
/// scoped_buffer.rs
 
///
 
/// Solves the common pattern where we are performing some kind of recursive
 
/// pattern while using a temporary buffer. At the start, or during the
 
/// procedure, we push stuff into the buffer. At the end we take out what we
 
/// have put in.
 
///
 
/// It is unsafe because we're using pointers to circumvent borrowing rules in
 
/// the name of code cleanliness. The correctness of use is checked in debug
 
/// mode.
 

	
 
use std::iter::FromIterator;
 

	
 
pub(crate) struct ScopedBuffer<T: Sized> {
 
    pub inner: Vec<T>,
 
}
 

	
 
/// A section of the buffer. Keeps track of where we started the section. When
 
/// done with the section one must call `into_vec` or `forget` to remove the
 
/// section from the underlying buffer.
 
/// section from the underlying buffer. This will also be done upon dropping the
 
/// ScopedSection in case errors are being handled.
 
pub(crate) struct ScopedSection<T: Sized> {
 
    inner: *mut Vec<T>,
 
    start_size: u32,
 
    #[cfg(debug_assertions)] cur_size: u32,
 
}
 

	
 
impl<T: Sized> ScopedBuffer<T> {
 
    pub(crate) fn new_reserved(capacity: usize) -> Self {
 
        Self { inner: Vec::with_capacity(capacity) }
 
    }
 

	
 
    pub(crate) fn start_section(&mut self) -> ScopedSection<T> {
 
        let start_size = self.inner.len() as u32;
 
        ScopedSection {
 
            inner: &mut self.inner,
 
            start_size,
 
            cur_size: start_size
 
        }
 
    }
 
}
 

	
 
impl<T: Clone> ScopedBuffer<T> {
 
    pub(crate) fn start_section_initialized(&mut self, initialize_with: &[T]) -> ScopedSection<T> {
 
        let start_size = self.inner.len() as u32;
 
        let data_size = initialize_with.len() as u32;
 
        self.inner.extend_from_slice(initialize_with);
 
        ScopedSection{
 
            inner: &mut self.inner,
 
            start_size,
 
            cur_size: start_size + data_size,
 
        }
 
    }
 
}
 

	
 
#[cfg(debug_assertions)]
 
impl<T: Sized> Drop for ScopedBuffer<T> {
 
    fn drop(&mut self) {
 
        // Make sure that everyone cleaned up the buffer neatly
 
        debug_assert!(self.inner.is_empty(), "dropped non-empty scoped buffer");
 
    }
 
}
 

	
 
impl<T: Sized> ScopedSection<T> {
 
    #[inline]
 
    pub(crate) fn push(&mut self, value: T) {
 
        let vec = unsafe{&mut *self.inner};
 
        debug_assert_eq!(vec.len(), self.cur_size as usize, "trying to push onto section, but size is larger than expected");
 
        vec.push(value);
 
        if cfg!(debug_assertions) { self.cur_size += 1; }
 
    }
 

	
 
    pub(crate) fn len(&self) -> usize {
 
        let vec = unsafe{&mut *self.inner};
 
        debug_assert_eq!(vec.len(), self.cur_size as usize, "trying to get section length, but size is larger than expected");
 
        return vec.len() - self.start_size as usize;
 
    }
 

	
 
    #[inline]
 
    pub(crate) fn forget(self) {
 
    pub(crate) fn forget(mut self) {
 
        let vec = unsafe{&mut *self.inner};
 
        debug_assert_eq!(vec.len(), self.cur_size as usize, "trying to forget section, but size is larger than expected");
 
        if cfg!(debug_assertions) {
 
            debug_assert_eq!(
 
                vec.len(), self.cur_size as usize,
 
                "trying to forget section, but size is larger than expected"
 
            );
 
            self.cur_size = self.start_size;
 
        }
 
        vec.truncate(self.start_size as usize);
 
    }
 

	
 
    #[inline]
 
    pub(crate) fn into_vec(self) -> Vec<T> {
 
    pub(crate) fn into_vec(mut self) -> Vec<T> {
 
        let vec = unsafe{&mut *self.inner};
 
        debug_assert_eq!(vec.len(), self.cur_size as usize, "trying to turn section into vec, but size is larger than expected");
 
        if cfg!(debug_assertions) {
 
            debug_assert_eq!(
 
                vec.len(), self.cur_size as usize,
 
                "trying to turn section into vec, but size is larger than expected"
 
            );
 
            self.cur_size = self.start_size;
 
        }
 
        let section = Vec::from_iter(vec.drain(self.start_size as usize..));
 
        section
 
    }
 
}
 

	
 
impl<T: Sized> std::ops::Index<usize> for ScopedSection<T> {
 
    type Output = T;
 

	
 
    fn index(&self, idx: usize) -> &Self::Output {
 
        let vec = unsafe{&*self.inner};
 
        return &vec[self.start_size as usize + idx]
 
    }
 
}
 

	
 
#[cfg(debug_assertions)]
 
impl<T: Sized> Drop for ScopedSection<T> {
 
    fn drop(&mut self) {
 
        // Make sure that the data was actually taken out of the scoped section
 
        let vec = unsafe{&*self.inner};
 
        debug_assert_eq!(vec.len(), self.start_size as usize);
 
        let mut vec = unsafe{&mut *self.inner};
 
        debug_assert_eq!(vec.len(), self.cur_size as usize);
 
        vec.truncate(self.start_size as usize);
 
    }
 
}
 
\ No newline at end of file
src/protocol/input_source.rs
Show inline comments
 
use std::fmt;
 
use std::sync::{RwLock, RwLockReadGuard};
 
use std::fmt::Write;
 

	
 
#[derive(Debug, Clone, Copy)]
 
pub struct InputPosition {
 
    pub line: u32,
 
    pub offset: u32,
 
}
 

	
 
impl InputPosition {
 
    pub(crate) fn with_offset(&self, offset: u32) -> Self {
 
        InputPosition { line: self.line, offset: self.offset + offset }
 
    }
 
}
 

	
 
#[derive(Debug, Clone, Copy)]
 
pub struct InputSpan {
 
    pub begin: InputPosition,
 
    pub end: InputPosition,
 
}
 

	
 
impl InputSpan {
 
    #[inline]
 
    pub fn from_positions(begin: InputPosition, end: InputPosition) -> Self {
 
        Self { begin, end }
 
    }
 
}
 

	
 
/// Wrapper around source file with optional filename. Ensures that the file is
 
/// only scanned once.
 
pub struct InputSource {
 
    pub(crate) filename: String,
 
    pub(crate) input: Vec<u8>,
 
    // Iteration
 
    line: u32,
 
    offset: usize,
 
    // State tracking
 
    pub(crate) had_error: Option<ParseError>,
 
    // The offset_lookup is built on-demand upon attempting to report an error.
 
    // Only one procedure will actually create the lookup, afterwards only read
 
    // locks will be held.
 
    offset_lookup: RwLock<Vec<u32>>,
 
}
 

	
 
impl InputSource {
 
    pub fn new(filename: String, input: Vec<u8>) -> Self {
 
        Self{
 
            filename,
 
            input,
 
            line: 1,
 
            offset: 0,
 
            had_error: None,
 
            offset_lookup: RwLock::new(Vec::new()),
 
        }
 
    }
 

	
 
    #[cfg(test)]
 
    pub fn new_test(input: &str) -> Self {
 
        let bytes = Vec::from(input.as_bytes());
 
        return Self::new(String::from("test"), bytes)
 
    }
 

	
 
    #[inline]
 
    pub fn pos(&self) -> InputPosition {
 
        InputPosition { line: self.line, offset: self.offset as u32 }
 
    }
 

	
 
    pub fn next(&self) -> Option<u8> {
 
        if self.offset < self.input.len() {
 
            Some(self.input[self.offset])
 
        } else {
 
            None
 
        }
 
    }
 

	
 
    pub fn lookahead(&self, offset: usize) -> Option<u8> {
 
        let offset_pos = self.offset + offset;
 
        if offset_pos < self.input.len() {
 
            Some(self.input[offset_pos])
 
        } else {
 
            None
 
        }
 
    }
 

	
 
    #[inline]
 
    pub fn section_at_pos(&self, start: InputPosition, end: InputPosition) -> &[u8] {
 
        &self.input[start.offset as usize..end.offset as usize]
 
    }
 

	
 
    #[inline]
 
    pub fn section_at_span(&self, span: InputSpan) -> &[u8] {
 
        &self.input[span.begin.offset as usize..span.end.offset as usize]
 
    }
 

	
 
    // Consumes the next character. Will check well-formedness of newlines: \r
 
    // must be followed by a \n, because this is used for error reporting. Will
 
    // not check for ascii-ness of the file, better left to a tokenizer.
 
    pub fn consume(&mut self) {
 
        match self.next() {
 
            Some(b'\r') => {
 
                if Some(b'\n') == self.lookahead(1) {
 
                    // Well formed file
 
                    self.offset += 1;
 
                } else {
 
                    // Not a well-formed file, pretend like we can continue
 
                    self.offset += 1;
 
                    self.set_error("Encountered carriage-feed without a following newline");
 
                }
 
            },
 
            Some(b'\n') => {
 
                self.line += 1;
 
                self.offset += 1;
 
            },
 
            Some(_) => {
 
                self.offset += 1;
 
            }
 
            None => {}
 
        }
 

	
 
        // Maybe we actually want to check this in release mode. Then again:
 
        // a 4 gigabyte source file... Really?
 
        debug_assert!(self.offset < u32::max_value() as usize);
 
    }
 

	
 
    fn set_error(&mut self, msg: &str) {
 
        if self.had_error.is_none() {
 
            self.had_error = Some(ParseError::new_error_str_at_pos(self, self.pos(), msg));
 
        }
 
    }
 

	
 
    fn get_lookup(&self) -> RwLockReadGuard<Vec<u32>> {
 
        // Once constructed the lookup always contains one element. We use this
 
        // to see if it is constructed already.
 
        {
 
            let lookup = self.offset_lookup.read().unwrap();
 
            if !lookup.is_empty() {
 
                return lookup;
 
            }
 
        }
 

	
 
        // Lookup was not constructed yet
 
        let mut lookup = self.offset_lookup.write().unwrap();
 
        if !lookup.is_empty() {
 
            // Somebody created it before we had the chance
 
            drop(lookup);
 
            let lookup = self.offset_lookup.read().unwrap();
 
            return lookup;
 
        }
 

	
 
        // Build the line number (!) to offset lookup, so offset by 1. We 
 
        // assume the entire source file is scanned (most common case) for
 
        // preallocation.
 
        lookup.reserve(self.line as usize + 2);
 
        lookup.push(0); // line 0: never used
 
        lookup.push(0); // first line: first character
 

	
 
        for char_idx in 0..self.input.len() {
 
            if self.input[char_idx] == b'\n' {
 
                lookup.push(char_idx as u32 + 1);
 
            }
 
        }
 

	
 
        lookup.push(self.input.len() as u32); // for lookup_line_end
 
        debug_assert_eq!(self.line as usize + 2, lookup.len(), "remove me: i am a testing assert and sometimes invalid");
 

	
 
        // Return created lookup
 
        drop(lookup);
 
        let lookup = self.offset_lookup.read().unwrap();
 
        return lookup;
 
    }
 

	
 
    /// Retrieves offset at which line starts (right after newline)
 
    fn lookup_line_start_offset(&self, line_number: u32) -> u32 {
 
        let lookup = self.get_lookup();
 
        lookup[line_number as usize]
 
    }
 

	
 
    /// Retrieves offset at which line ends (at the newline character or the
 
    /// preceding carriage feed for \r\n-encoded newlines)
 
    fn lookup_line_end_offset(&self, line_number: u32) -> u32 {
 
        let lookup = self.get_lookup();
 
        let offset = lookup[(line_number + 1) as usize] - 1;
 
        let offset_usize = offset as usize;
 

	
 
        // Compensate for newlines and a potential carriage feed
 
        if self.input[offset_usize] == b'\n' {
 
            if offset_usize > 0 && self.input[offset_usize] == b'\r' {
 
                offset - 2
 
            } else {
 
                offset - 1
 
            }
 
        } else {
 
            offset
 
        }
 
    }
 
}
 

	
 
#[derive(Debug)]
 
pub enum StatementKind {
 
    Info,
 
    Error
 
}
 

	
 
#[derive(Debug)]
 
pub enum ContextKind {
 
    SingleLine,
 
    MultiLine,
 
}
 

	
 
#[derive(Debug)]
 
pub struct ParseErrorStatement {
 
    pub(crate) statement_kind: StatementKind,
 
    pub(crate) context_kind: ContextKind,
 
    pub(crate) start_line: u32,
 
    pub(crate) start_column: u32,
 
    pub(crate) end_line: u32,
 
    pub(crate) end_column: u32,
 
    pub(crate) filename: String,
 
    pub(crate) context: String,
 
    pub(crate) message: String,
 
}
 

	
 
impl ParseErrorStatement {
 
    fn from_source_at_pos(statement_kind: StatementKind, source: &InputSource, position: InputPosition, message: String) -> Self {
 
        // Seek line start and end
 
        let line_start = source.lookup_line_start_offset(position.line);
 
        let line_end = source.lookup_line_end_offset(position.line);
 
        let context = Self::create_context(source, line_start as usize, line_end as usize);
 
        debug_assert!(position.offset >= line_start);
 
        let column = position.offset - line_start + 1;
 

	
 
        Self{
 
            statement_kind,
 
            context_kind: ContextKind::SingleLine,
 
            start_line: position.line,
 
            start_column: column,
 
            end_line: position.line,
 
            end_column: column + 1,
 
            filename: source.filename.clone(),
 
            context,
 
            message,
 
        }
 
    }
 

	
 
    fn from_source_at_span(statement_kind: StatementKind, source: &InputSource, span: InputSpan, message: String) -> Self {
 
        debug_assert!(span.end.line >= span.begin.line);
 
        debug_assert!(span.end.offset >= span.begin.offset);
 

	
 
        let first_line_start = source.lookup_line_start_offset(span.begin.line);
 
        let last_line_start = source.lookup_line_start_offset(span.end.line);
 
        let last_line_end = source.lookup_line_end_offset(span.end.line);
 
        let context = Self::create_context(source, first_line_start as usize, last_line_end as usize);
 
        debug_assert!(span.begin.offset >= first_line_start);
 
        let start_column = span.begin.offset - first_line_start + 1;
 
        let end_column = span.end.offset - last_line_start + 1;
 

	
 
        let context_kind = if span.begin.line == span.end.line {
 
            ContextKind::SingleLine
 
        } else {
 
            ContextKind::MultiLine
 
        };
 

	
 
        Self{
 
            statement_kind,
 
            context_kind,
 
            start_line: first_line_start,
 
            start_line: span.begin.line,
 
            start_column,
 
            end_line: last_line_start,
 
            end_line: span.end.line,
 
            end_column,
 
            filename: source.filename.clone(),
 
            context,
 
            message,
 
        }
 
    }
 

	
 
    /// Produces context from source
 
    fn create_context(source: &InputSource, start: usize, end: usize) -> String {
 
        let context_raw = &source.input[start..end];
 
        String::from_utf8_lossy(context_raw).to_string()
 
    }
 
}
 

	
 
impl fmt::Display for ParseErrorStatement {
 
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
 
        // Write kind of statement and message
 
        match self.statement_kind {
 
            StatementKind::Info => f.write_str(" INFO: ")?,
 
            StatementKind::Error => f.write_str("ERROR: ")?,
 
        }
 
        f.write_str(&self.message)?;
 
        f.write_char('\n')?;
 

	
 
        // Write originating file/line/column
 
        f.write_str(" +- ")?;
 
        if !self.filename.is_empty() {
 
            write!(f, "in {} ", self.filename)?;
 
        }
 

	
 
        match self.context_kind {
 
            ContextKind::SingleLine => writeln!(f, " at {}:{}", self.start_line, self.start_column),
 
            ContextKind::MultiLine => writeln!(
 
                f, " from {}:{} to {}:{}",
 
                self.start_line, self.start_column, self.end_line, self.end_column
 
            )
 
        }?;
 

	
 
        // Helper function for writing context: converting tabs into 4 spaces
 
        // (oh, the controversy!) and creating an annotated line
 
        fn transform_context(source: &str, target: &mut String) {
 
            for char in source.chars() {
 
                if char == '\t' {
 
                    target.push_str("    ");
 
                } else {
 
                    target.push(char);
 
                }
 
            }
 
        }
 

	
 
        fn extend_annotation(first_col: u32, last_col: u32, source: &str, target: &mut String, extend_char: char) {
 
            debug_assert!(first_col > 0 && last_col > first_col);
 
            for (char_idx, char) in source.chars().enumerate().skip(first_col as usize - 1) {
 
                if char_idx == last_col as usize {
 
            let first_idx = first_col as usize - 1;
 
            let last_idx = last_col as usize - 1;
 
            for (char_idx, char) in source.chars().enumerate().skip(first_idx) {
 
                if char_idx == last_idx as usize {
 
                    break;
 
                }
 

	
 
                if char == '\t' {
 
                    for _ in 0..4 { target.push(extend_char); }
 
                } else {
 
                    target.push(extend_char);
 
                }
 
            }
 
        }
 

	
 
        // Write source context
 
        writeln!(f, " | ")?;
 

	
 
        let mut context = String::with_capacity(128);
 
        let mut annotation = String::with_capacity(128);
 

	
 
        match self.context_kind {
 
            ContextKind::SingleLine => {
 
                // Write single line of context with indicator for the offending
 
                // span underneath.
 
                context.push_str(" |  ");
 
                transform_context(&self.context, &mut context);
 
                context.push('\n');
 
                f.write_str(&context)?;
 

	
 
                annotation.push_str(" | ");
 
                extend_annotation(1, self.start_column, &self.context, &mut annotation, ' ');
 
                extend_annotation(1, self.start_column + 1, &self.context, &mut annotation, ' ');
 
                extend_annotation(self.start_column, self.end_column, &self.context, &mut annotation, '~');
 
                annotation.push('\n');
 

	
 
                f.write_str(&annotation)?;
 
            },
 
            ContextKind::MultiLine => {
 
                // Annotate all offending lines
 
                // - first line
 
                let mut lines = self.context.lines();
 
                let first_line = lines.next().unwrap();
 
                transform_context(first_line, &mut context);
 
                writeln!(f, " |- {}", &context)?;
 

	
 
                // - remaining lines
 
                let mut last_line = first_line;
 
                while let Some(cur_line) = lines.next() {
 
                    context.clear();
 
                    transform_context(cur_line, &mut context);
 
                    writeln!(f, " |  {}", &context)?;
 
                    last_line = cur_line;
 
                }
 

	
 
                // - underline beneath last line
 
                annotation.push_str(" \\__");
 
                extend_annotation(1, self.end_column, &last_line, &mut annotation, '_');
 
                annotation.push_str("/\n");
 
                f.write_str(&annotation)?;
 
            }
 
        }
 

	
 
        Ok(())
 
    }
 
}
 

	
 
#[derive(Debug)]
 
pub struct ParseError {
 
    pub(crate) statements: Vec<ParseErrorStatement>
 
}
 

	
 
impl fmt::Display for ParseError {
 
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
 
        if self.statements.is_empty() {
 
            return Ok(())
 
        }
 

	
 
        self.statements[0].fmt(f)?;
 
        for statement in self.statements.iter().skip(1) {
 
            writeln!(f)?;
 
            statement.fmt(f)?;
 
        }
 

	
 
        Ok(())
 
    }
 
}
 

	
 
impl ParseError {
 
    pub fn new_error_at_pos(source: &InputSource, position: InputPosition, message: String) -> Self {
 
        Self{ statements: vec!(ParseErrorStatement::from_source_at_pos(
 
            StatementKind::Error, source, position, message
 
        )) }
 
    }
 

	
 
    pub fn new_error_str_at_pos(source: &InputSource, position: InputPosition, message: &str) -> Self {
 
        Self{ statements: vec!(ParseErrorStatement::from_source_at_pos(
 
            StatementKind::Error, source, position, message.to_string()
 
        )) }
 
    }
 

	
 
    pub fn new_error_at_span(source: &InputSource, span: InputSpan, message: String) -> Self {
 
        Self{ statements: vec!(ParseErrorStatement::from_source_at_span(
 
            StatementKind::Error, source, span, message
 
        )) }
 
    }
 

	
 
    pub fn new_error_str_at_span(source: &InputSource, span: InputSpan, message: &str) -> Self {
 
        Self{ statements: vec!(ParseErrorStatement::from_source_at_span(
 
            StatementKind::Error, source, span, message.to_string()
 
        )) }
 
    }
 

	
 
    pub fn with_at_pos(mut self, error_type: StatementKind, source: &InputSource, position: InputPosition, message: String) -> Self {
 
        self.statements.push(ParseErrorStatement::from_source_at_pos(error_type, source, position, message));
 
        self
 
    }
 

	
 
    pub fn with_at_span(mut self, error_type: StatementKind, source: &InputSource, span: InputSpan, message: String) -> Self {
 
        self.statements.push(ParseErrorStatement::from_source_at_span(error_type, source, span, message.to_string()));
 
        self
 
    }
 

	
 
    pub fn with_info_at_pos(self, source: &InputSource, position: InputPosition, msg: String) -> Self {
 
        self.with_at_pos(StatementKind::Info, source, position, msg)
 
    }
 

	
 
    pub fn with_info_str_at_pos(self, source: &InputSource, position: InputPosition, msg: &str) -> Self {
 
        self.with_at_pos(StatementKind::Info, source, position, msg.to_string())
 
    }
 

	
 
    pub fn with_info_at_span(self, source: &InputSource, span: InputSpan, msg: String) -> Self {
 
        self.with_at_span(StatementKind::Info, source, span, msg)
 
    }
 

	
 
    pub fn with_info_str_at_span(self, source: &InputSource, span: InputSpan, msg: &str) -> Self {
 
        self.with_at_span(StatementKind::Info, source, span, msg.to_string())
 
    }
 
}
src/protocol/parser/mod.rs
Show inline comments
 
mod depth_visitor;
 
pub(crate) mod symbol_table;
 
pub(crate) mod type_table;
 
pub(crate) mod tokens;
 
pub(crate) mod token_parsing;
 
pub(crate) mod pass_tokenizer;
 
pub(crate) mod pass_symbols;
 
pub(crate) mod pass_imports;
 
pub(crate) mod pass_definitions;
 
pub(crate) mod pass_validation_linking;
 
pub(crate) mod pass_typing;
 
mod visitor;
 

	
 
use depth_visitor::*;
 
use tokens::*;
 
use crate::collections::*;
 
use symbol_table::SymbolTable;
 
use visitor::Visitor2;
 
use pass_tokenizer::PassTokenizer;
 
use pass_symbols::PassSymbols;
 
use pass_imports::PassImport;
 
use pass_definitions::PassDefinitions;
 
use pass_validation_linking::PassValidationLinking;
 
use pass_typing::{PassTyping, ResolveQueue};
 
use type_table::TypeTable;
 

	
 
use crate::protocol::ast::*;
 
use crate::protocol::input_source::*;
 

	
 
use crate::protocol::ast_printer::ASTWriter;
 

	
 
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)]
 
pub enum ModuleCompilationPhase {
 
    Source,                 // only source is set
 
    Tokenized,              // source is tokenized
 
    SymbolsScanned,         // all definitions are linked to their type class
 
    ImportsResolved,        // all imports are added to the symbol table
 
    DefinitionsParsed,      // produced the AST for the entire module
 
    TypesAddedToTable,      // added all definitions to the type table
 
    ValidatedAndLinked,     // AST is traversed and has linked the required AST nodes
 
    Typed,                  // Type inference and checking has been performed
 
}
 

	
 
pub struct Module {
 
    // Buffers
 
    pub source: InputSource,
 
    pub tokens: TokenBuffer,
 
    // Identifiers
 
    pub root_id: RootId,
 
    pub name: Option<(PragmaId, StringRef<'static>)>,
 
    pub version: Option<(PragmaId, i64)>,
 
    pub phase: ModuleCompilationPhase,
 
}
 

	
 
pub struct PassCtx<'a> {
 
    heap: &'a mut Heap,
 
    symbols: &'a mut SymbolTable,
 
    pool: &'a mut StringPool,
 
}
 

	
 
pub struct Parser {
 
    pub(crate) heap: Heap,
 
    pub(crate) string_pool: StringPool,
 
    pub(crate) modules: Vec<Module>,
 
    pub(crate) symbol_table: SymbolTable,
 
    pub(crate) type_table: TypeTable,
 
    // Compiler passes
 
    pass_tokenizer: PassTokenizer,
 
    pass_symbols: PassSymbols,
 
    pass_import: PassImport,
 
    pass_definitions: PassDefinitions,
 
    pass_validation: PassValidationLinking,
 
    pass_typing: PassTyping,
 
}
 

	
 
impl Parser {
 
    pub fn new() -> Self {
 
        Parser{
 
            heap: Heap::new(),
 
            string_pool: StringPool::new(),
 
            modules: Vec::new(),
 
            symbol_table: SymbolTable::new(),
 
            type_table: TypeTable::new(),
 
            pass_tokenizer: PassTokenizer::new(),
 
            pass_symbols: PassSymbols::new(),
 
            pass_import: PassImport::new(),
 
            pass_definitions: PassDefinitions::new(),
 
            pass_validation: PassValidationLinking::new(),
 
            pass_typing: PassTyping::new(),
 
        }
 
    }
 

	
 
    pub fn feed(&mut self, mut source: InputSource) -> Result<(), ParseError> {
 
        // TODO: @Optimize
 
        let mut token_buffer = TokenBuffer::new();
 
        self.pass_tokenizer.tokenize(&mut source, &mut token_buffer)?;
 

	
 
        let module = Module{
 
            source,
 
            tokens: token_buffer,
 
            root_id: RootId::new_invalid(),
 
            name: None,
 
            version: None,
 
            phase: ModuleCompilationPhase::Tokenized,
 
        };
 
        self.modules.push(module);
 

	
 
        Ok(())
 
    }
 

	
 
    pub fn parse(&mut self) -> Result<(), ParseError> {
 
        let mut pass_ctx = PassCtx{
 
            heap: &mut self.heap,
 
            symbols: &mut self.symbol_table,
 
            pool: &mut self.string_pool,
 
        };
 

	
 
        // Advance all modules to the phase where all symbols are scanned
 
        for module_idx in 0..self.modules.len() {
 
            self.pass_symbols.parse(&mut self.modules, module_idx, &mut pass_ctx)?;
 
        }
 

	
 
        // With all symbols scanned, perform further compilation until we can
 
        // add all base types to the type table.
 
        for module_idx in 0..self.modules.len() {
 
            self.pass_import.parse(&mut self.modules, module_idx, &mut pass_ctx)?;
 
            self.pass_definitions.parse(&mut self.modules, module_idx, &mut pass_ctx)?;
 
        }
 

	
 
        // Add every known type to the type table
 
        self.type_table.build_base_types(&mut self.modules, &mut pass_ctx)?;
 

	
 
        // Continue compilation with the remaining phases now that the types
 
        // are all in the type table
 
        for module_idx in 0..self.modules.len() {
 
            // TODO: Remove the entire Visitor abstraction. It really doesn't
 
            //  make sense considering the amount of special handling we do
 
            //  in each pass.
 
            let mut ctx = visitor::Ctx{
 
                heap: &mut self.heap,
 
                module: &self.modules[module_idx],
 
                module: &mut self.modules[module_idx],
 
                symbols: &mut self.symbol_table,
 
                types: &mut self.type_table,
 
            };
 
            self.pass_validation.visit_module(&mut ctx)?;
 
        }
 

	
 
        // Perform typechecking on all modules
 
        let mut queue = ResolveQueue::new();
 
        for module in &self.modules {
 
        for module in &mut self.modules {
 
            let ctx = visitor::Ctx{
 
                heap: &mut self.heap,
 
                module,
 
                symbols: &mut self.symbol_table,
 
                types: &mut self.type_table,
 
            };
 
            PassTyping::queue_module_definitions(&ctx, &mut queue);
 
        };
 
        while !queue.is_empty() {
 
            let top = queue.pop().unwrap();
 
            let mut ctx = visitor::Ctx{
 
                heap: &mut self.heap,
 
                module: &self.modules[top.root_id.index as usize],
 
                module: &mut self.modules[top.root_id.index as usize],
 
                symbols: &mut self.symbol_table,
 
                types: &mut self.type_table,
 
            };
 
            self.pass_typing.handle_module_definition(&mut ctx, &mut queue, top)?;
 
        }
 

	
 
        // Perform remaining steps
 
        // TODO: Phase out at some point
 
        for module in &self.modules {
 
            let root_id = module.root_id;
 
            if let Err((position, message)) = Self::parse_inner(&mut self.heap, root_id) {
 
                return Err(ParseError::new_error_str_at_pos(&self.modules[0].source, position, &message))
 
            }
 
        }
 

	
 
        // let mut writer = ASTWriter::new();
 
        // let mut file = std::fs::File::create(std::path::Path::new("ast.txt")).unwrap();
 
        // writer.write_ast(&mut file, &self.heap);
 

	
 
        Ok(())
 
    }
 

	
 
    pub fn parse_inner(h: &mut Heap, pd: RootId) -> VisitorResult {
 
        // TODO: @cleanup, slowly phasing out old compiler
 
        // NestedSynchronousStatements::new().visit_protocol_description(h, pd)?;
 
        // ChannelStatementOccurrences::new().visit_protocol_description(h, pd)?;
 
        // FunctionStatementReturns::new().visit_protocol_description(h, pd)?;
 
        // ComponentStatementReturnNew::new().visit_protocol_description(h, pd)?;
 
        // CheckBuiltinOccurrences::new().visit_protocol_description(h, pd)?;
 
        // BuildSymbolDeclarations::new().visit_protocol_description(h, pd)?;
 
        // LinkCallExpressions::new().visit_protocol_description(h, pd)?;
 
        // BuildScope::new().visit_protocol_description(h, pd)?;
 
        // ResolveVariables::new().visit_protocol_description(h, pd)?;
 
        LinkStatements::new().visit_protocol_description(h, pd)?;
 
        // BuildLabels::new().visit_protocol_description(h, pd)?;
 
        // ResolveLabels::new().visit_protocol_description(h, pd)?;
 
        AssignableExpressions::new().visit_protocol_description(h, pd)?;
 
        IndexableExpressions::new().visit_protocol_description(h, pd)?;
 
        SelectableExpressions::new().visit_protocol_description(h, pd)?;
 

	
 
        Ok(())
 
    }
 
}
 
\ No newline at end of file
src/protocol/parser/pass_definitions.rs
Show inline comments
 
use crate::protocol::ast::*;
 
use super::symbol_table::*;
 
use super::{Module, ModuleCompilationPhase, PassCtx};
 
use super::tokens::*;
 
use super::token_parsing::*;
 
use crate::protocol::input_source::{InputSource as InputSource, InputPosition as InputPosition, InputSpan, ParseError};
 
use crate::collections::*;
 

	
 
/// Parses all the tokenized definitions into actual AST nodes.
 
pub(crate) struct PassDefinitions {
 
    // State
 
    cur_definition: DefinitionId,
 
    // Temporary buffers of various kinds
 
    buffer: String,
 
    struct_fields: ScopedBuffer<StructFieldDefinition>,
 
    enum_variants: ScopedBuffer<EnumVariantDefinition>,
 
    union_variants: ScopedBuffer<UnionVariantDefinition>,
 
    parameters: ScopedBuffer<ParameterId>,
 
    expressions: ScopedBuffer<ExpressionId>,
 
    statements: ScopedBuffer<StatementId>,
 
    parser_types: Vec<ParserType>,
 
}
 

	
 
impl PassDefinitions {
 
    pub(crate) fn new() -> Self {
 
        Self{
 
            cur_definition: DefinitionId::new_invalid(),
 
            buffer: String::with_capacity(128),
 
            struct_fields: ScopedBuffer::new_reserved(128),
 
            enum_variants: ScopedBuffer::new_reserved(128),
 
            union_variants: ScopedBuffer::new_reserved(128),
 
            parameters: ScopedBuffer::new_reserved(128),
 
            expressions: ScopedBuffer::new_reserved(128),
 
            statements: ScopedBuffer::new_reserved(128),
 
            parser_types: Vec::with_capacity(128),
 
        }
 
    }
 

	
 
    pub(crate) fn parse(&mut self, modules: &mut [Module], module_idx: usize, ctx: &mut PassCtx) -> Result<(), ParseError> {
 
        let module = &modules[module_idx];
 
        let module_range = &module.tokens.ranges[0];
 
        debug_assert_eq!(module.phase, ModuleCompilationPhase::ImportsResolved);
 
        debug_assert_eq!(module_range.range_kind, TokenRangeKind::Module);
 

	
 
        // Although we only need to parse the definitions, we want to go through
 
        // code ranges as well such that we can throw errors if we get
 
        // unexpected tokens at the module level of the source.
 
        let mut range_idx = module_range.first_child_idx;
 
        loop {
 
            let range_idx_usize = range_idx as usize;
 
            let cur_range = &module.tokens.ranges[range_idx_usize];
 

	
 
            match cur_range.range_kind {
 
                TokenRangeKind::Module => unreachable!(), // should not be reachable
 
                TokenRangeKind::Pragma | TokenRangeKind::Import => continue, // already fully parsed
 
                TokenRangeKind::Definition | TokenRangeKind::Code => {}
 
            }
 

	
 
            self.visit_range(modules, module_idx, ctx, range_idx_usize)?;
 

	
 
            match cur_range.next_sibling_idx {
 
                Some(idx) => { range_idx = idx; },
 
                None => { break; },
 
            if cur_range.next_sibling_idx == NO_SIBLING {
 
                break;
 
            } else {
 
                range_idx = cur_range.next_sibling_idx;
 
            }
 
        }
 

	
 
        modules[module_idx].phase = ModuleCompilationPhase::DefinitionsParsed;
 

	
 
        Ok(())
 
    }
 

	
 
    fn visit_range(
 
        &mut self, modules: &[Module], module_idx: usize, ctx: &mut PassCtx, range_idx: usize
 
    ) -> Result<(), ParseError> {
 
        let module = &modules[module_idx];
 
        let cur_range = &module.tokens.ranges[range_idx];
 
        debug_assert!(cur_range.range_kind == TokenRangeKind::Definition || cur_range.range_kind == TokenRangeKind::Code);
 

	
 
        // Detect which definition we're parsing
 
        let mut iter = module.tokens.iter_range(cur_range);
 
        loop {
 
            let next = iter.next();
 
            if next.is_none() {
 
                return Ok(())
 
            }
 

	
 
            // Token was not None, so peek_ident returns None if not an ident
 
            let ident = peek_ident(&module.source, &mut iter);
 
            match ident {
 
                Some(KW_STRUCT) => self.visit_struct_definition(module, &mut iter, ctx)?,
 
                Some(KW_ENUM) => self.visit_enum_definition(module, &mut iter, ctx)?,
 
                Some(KW_UNION) => self.visit_union_definition(module, &mut iter, ctx)?,
 
                Some(KW_FUNCTION) => self.visit_function_definition(module, &mut iter, ctx)?,
 
                Some(KW_PRIMITIVE) | Some(KW_COMPOSITE) => self.visit_component_definition(module, &mut iter, ctx)?,
 
                _ => return Err(ParseError::new_error_str_at_pos(
 
                    &module.source, iter.last_valid_pos(),
 
                    "unexpected symbol, expected some kind of type or procedure definition"
 
                    "unexpected symbol, expected a keyword marking the start of a definition"
 
                )),
 
            }
 
        }
 
    }
 

	
 
    fn visit_struct_definition(
 
        &mut self, module: &Module, iter: &mut TokenIter, ctx: &mut PassCtx
 
    ) -> Result<(), ParseError> {
 
        consume_exact_ident(&module.source, iter, KW_STRUCT)?;
 
        let (ident_text, _) = consume_ident(&module.source, iter)?;
 

	
 
        // Retrieve preallocated DefinitionId
 
        let module_scope = SymbolScope::Module(module.root_id);
 
        let definition_id = ctx.symbols.get_symbol_by_name_defined_in_scope(module_scope, ident_text)
 
            .unwrap().variant.as_definition().definition_id;
 
        self.cur_definition = definition_id;
 

	
 
        // Parse struct definition
 
        consume_polymorphic_vars_spilled(&module.source, iter, ctx)?;
 

	
 
        let mut fields_section = self.struct_fields.start_section();
 
        consume_comma_separated(
 
            TokenKind::OpenCurly, TokenKind::CloseCurly, &module.source, iter, ctx,
 
            |source, iter, ctx| {
 
                let poly_vars = ctx.heap[definition_id].poly_vars(); // TODO: @Cleanup, this is really ugly. But rust...
 

	
 
                let start_pos = iter.last_valid_pos();
 
                let parser_type = consume_parser_type(
 
                    source, iter, &ctx.symbols, &ctx.heap, poly_vars, module_scope,
 
                    definition_id, false, 0
 
                )?;
 
                let field = consume_ident_interned(source, iter, ctx)?;
 
                Ok(StructFieldDefinition{
 
                    span: InputSpan::from_positions(start_pos, field.span.end),
 
                    field, parser_type
 
                })
 
            },
 
            &mut fields_section, "a struct field", "a list of struct fields", None
 
        )?;
 

	
 
        // Transfer to preallocated definition
 
        let struct_def = ctx.heap[definition_id].as_struct_mut();
 
        struct_def.fields = fields_section.into_vec();
 

	
 
        Ok(())
 
    }
 

	
 
    fn visit_enum_definition(
 
        &mut self, module: &Module, iter: &mut TokenIter, ctx: &mut PassCtx
 
    ) -> Result<(), ParseError> {
 
        consume_exact_ident(&module.source, iter, KW_ENUM)?;
 
        let (ident_text, _) = consume_ident(&module.source, iter)?;
 

	
 
        // Retrieve preallocated DefinitionId
 
        let module_scope = SymbolScope::Module(module.root_id);
 
        let definition_id = ctx.symbols.get_symbol_by_name_defined_in_scope(module_scope, ident_text)
 
            .unwrap().variant.as_definition().definition_id;
 
        self.cur_definition = definition_id;
 

	
 
        // Parse enum definition
 
        consume_polymorphic_vars_spilled(&module.source, iter, ctx)?;
 

	
 
        let mut enum_section = self.enum_variants.start_section();
 
        consume_comma_separated(
 
            TokenKind::OpenCurly, TokenKind::CloseCurly, &module.source, iter, ctx,
 
            |source, iter, ctx| {
 
                let identifier = consume_ident_interned(source, iter, ctx)?;
 
                let value = if iter.next() == Some(TokenKind::Equal) {
 
                    iter.consume();
 
                    let (variant_number, _) = consume_integer_literal(source, iter, &mut self.buffer)?;
 
                    EnumVariantValue::Integer(variant_number as i64) // TODO: @int
 
                } else {
 
                    EnumVariantValue::None
 
                };
 
                Ok(EnumVariantDefinition{ identifier, value })
 
            },
 
            &mut enum_section, "an enum variant", "a list of enum variants", None
 
        )?;
 

	
 
        // Transfer to definition
 
        let enum_def = ctx.heap[definition_id].as_enum_mut();
 
        enum_def.variants = enum_section.into_vec();
 

	
 
        Ok(())
 
    }
 

	
 
    fn visit_union_definition(
 
        &mut self, module: &Module, iter: &mut TokenIter, ctx: &mut PassCtx
 
    ) -> Result<(), ParseError> {
 
        consume_exact_ident(&module.source, iter, KW_UNION)?;
 
        let (ident_text, _) = consume_ident(&module.source, iter)?;
 

	
 
        // Retrieve preallocated DefinitionId
 
        let module_scope = SymbolScope::Module(module.root_id);
 
        let definition_id = ctx.symbols.get_symbol_by_name_defined_in_scope(module_scope, ident_text)
 
            .unwrap().variant.as_definition().definition_id;
 
        self.cur_definition = definition_id;
 

	
 
        // Parse union definition
 
        consume_polymorphic_vars_spilled(&module.source, iter, ctx)?;
 

	
 
        let mut variants_section = self.union_variants.start_section();
 
        consume_comma_separated(
 
            TokenKind::OpenCurly, TokenKind::CloseCurly, &module.source, iter, ctx,
 
            |source, iter, ctx| {
 
                let identifier = consume_ident_interned(source, iter, ctx)?;
 
                let mut close_pos = identifier.span.end;
 

	
 
                let has_embedded = maybe_consume_comma_separated(
 
                    TokenKind::OpenParen, TokenKind::CloseParen, source, iter, ctx,
 
                    |source, iter, ctx| {
 
                        let poly_vars = ctx.heap[definition_id].poly_vars(); // TODO: @Cleanup, this is really ugly. But rust...
 
                        consume_parser_type(
 
                            source, iter, &ctx.symbols, &ctx.heap, poly_vars,
 
                            module_scope, definition_id, false, 0
 
                        )
 
                    },
 
                    &mut self.parser_types, "an embedded type", Some(&mut close_pos)
 
                )?;
 
                let value = if has_embedded {
 
                    UnionVariantValue::Embedded(self.parser_types.clone())
 
                } else {
 
                    UnionVariantValue::None
 
                };
 
                self.parser_types.clear();
 

	
 
                Ok(UnionVariantDefinition{
 
                    span: InputSpan::from_positions(identifier.span.begin, close_pos),
 
                    identifier,
 
                    value
 
                })
 
            },
 
            &mut variants_section, "a union variant", "a list of union variants", None
 
        )?;
 

	
 
        // Transfer to AST
 
        let union_def = ctx.heap[definition_id].as_union_mut();
 
        union_def.variants = variants_section.into_vec();
 

	
 
        Ok(())
 
    }
 

	
 
    fn visit_function_definition(
 
        &mut self, module: &Module, iter: &mut TokenIter, ctx: &mut PassCtx
 
    ) -> Result<(), ParseError> {
 
        consume_exact_ident(&module.source, iter, KW_FUNCTION)?;
 
        let (ident_text, _) = consume_ident(&module.source, iter)?;
 

	
 
        // Retrieve preallocated DefinitionId
 
        let module_scope = SymbolScope::Module(module.root_id);
 
        let definition_id = ctx.symbols.get_symbol_by_name_defined_in_scope(module_scope, ident_text)
 
            .unwrap().variant.as_definition().definition_id;
 
        self.cur_definition = definition_id;
 

	
 
        consume_polymorphic_vars_spilled(&module.source, iter, ctx)?;
 

	
 
        // Parse function's argument list
 
        let mut parameter_section = self.parameters.start_section();
 
        consume_parameter_list(
 
            &module.source, iter, ctx, &mut parameter_section, module_scope, definition_id
 
        )?;
 
        let parameters = parameter_section.into_vec();
 

	
 
        // Consume return types
 
        consume_token(&module.source, iter, TokenKind::ArrowRight)?;
 
        let mut open_curly_pos = iter.last_valid_pos();
 
        consume_comma_separated_until(
 
            TokenKind::OpenCurly, &module.source, iter, ctx,
 
            |source, iter, ctx| {
 
                let poly_vars = ctx.heap[definition_id].poly_vars(); // TODO: @Cleanup, this is really ugly. But rust...
 
                consume_parser_type(source, iter, &ctx.symbols, &ctx.heap, poly_vars, module_scope, definition_id, false, 0)
 
            },
 
            &mut self.parser_types, "a return type", Some(&mut open_curly_pos)
 
        )?;
 
        let return_types = self.parser_types.clone();
 

	
 
        // TODO: @ReturnValues
 
        match return_types.len() {
 
            0 => return Err(ParseError::new_error_str_at_pos(&module.source, open_curly_pos, "expected a return type")),
 
            1 => {},
 
            _ => return Err(ParseError::new_error_str_at_pos(&module.source, open_curly_pos, "multiple return types are not (yet) allowed")),
 
        }
 

	
 
        // Consume block
 
        let body = self.consume_block_statement_without_leading_curly(module, iter, ctx, open_curly_pos)?;
 

	
 
        // Assign everything in the preallocated AST node
 
        let function = ctx.heap[definition_id].as_function_mut();
 
        function.return_types = return_types;
 
        function.parameters = parameters;
 
        function.body = body;
 

	
 
        Ok(())
 
    }
 

	
 
    fn visit_component_definition(
 
        &mut self, module: &Module, iter: &mut TokenIter, ctx: &mut PassCtx
 
    ) -> Result<(), ParseError> {
 
        let (_variant_text, _) = consume_any_ident(&module.source, iter)?;
 
        debug_assert!(_variant_text == KW_PRIMITIVE || _variant_text == KW_COMPOSITE);
 
        let (ident_text, _) = consume_ident(&module.source, iter)?;
 

	
 
        // Retrieve preallocated definition
 
        let module_scope = SymbolScope::Module(module.root_id);
 
        let definition_id = ctx.symbols.get_symbol_by_name_defined_in_scope(module_scope, ident_text)
 
            .unwrap().variant.as_definition().definition_id;
 
        self.cur_definition = definition_id;
 

	
 
        consume_polymorphic_vars_spilled(&module.source, iter, ctx)?;
 

	
 
        // Parse component's argument list
 
        let mut parameter_section = self.parameters.start_section();
 
        consume_parameter_list(
 
            &module.source, iter, ctx, &mut parameter_section, module_scope, definition_id
 
        )?;
 
        let parameters = parameter_section.into_vec();
 

	
 
        // Consume block
 
        let body = self.consume_block_statement(module, iter, ctx)?;
 

	
 
        // Assign everything in the AST node
 
        let component = ctx.heap[definition_id].as_component_mut();
 
        component.parameters = parameters;
 
        component.body = body;
 

	
 
        Ok(())
 
    }
 

	
 
    /// Consumes a block statement. If the resulting statement is not a block
 
    /// (e.g. for a shorthand "if (expr) single_statement") then it will be
 
    /// wrapped in one
 
    fn consume_block_or_wrapped_statement(
 
        &mut self, module: &Module, iter: &mut TokenIter, ctx: &mut PassCtx
 
    ) -> Result<BlockStatementId, ParseError> {
 
        if Some(TokenKind::OpenCurly) == iter.next() {
 
            // This is a block statement
 
            self.consume_block_statement(module, iter, ctx)
 
        } else {
 
            // Not a block statement, so wrap it in one
 
            let mut statements = self.statements.start_section();
 
            let wrap_begin_pos = iter.last_valid_pos();
 
            self.consume_statement(module, iter, ctx, &mut statements)?;
 
            let wrap_end_pos = iter.last_valid_pos();
 

	
 
            debug_assert_eq!(statements.len(), 1);
 
            let statements = statements.into_vec();
 

	
 
            Ok(ctx.heap.alloc_block_statement(|this| BlockStatement{
 
                this,
 
                is_implicit: true,
 
                span: InputSpan::from_positions(wrap_begin_pos, wrap_end_pos), // TODO: @Span
 
                statements,
 
                parent_scope: None,
 
                relative_pos_in_parent: 0,
 
                locals: Vec::new(),
 
                labels: Vec::new()
 
            }))
 
        }
 
    }
 

	
 
    /// Consumes a statement and returns a boolean indicating whether it was a
 
    /// block or not.
 
    fn consume_statement(
 
        &mut self, module: &Module, iter: &mut TokenIter, ctx: &mut PassCtx, section: &mut ScopedSection<StatementId>
 
    ) -> Result<(), ParseError> {
 
        let next = iter.next().expect("consume_statement has a next token");
 

	
 
        if next == TokenKind::OpenCurly {
 
            let id = self.consume_block_statement(module, iter, ctx)?;
 
            section.push(id.upcast());
 
        } else if next == TokenKind::Ident {
 
            let (ident, _) = consume_any_ident(&module.source, iter)?;
 
            let ident = peek_ident(&module.source, iter).unwrap();
 
            if ident == KW_STMT_IF {
 
                // Consume if statement and place end-if statement directly
 
                // after it.
 
                let id = self.consume_if_statement(module, iter, ctx)?;
 
                section.push(id.upcast());
 

	
 
                let end_if = ctx.heap.alloc_end_if_statement(|this| EndIfStatement{
 
                    this, start_if: id, next: None
 
                });
 
                section.push(id.upcast());
 

	
 
                let if_stmt = &mut ctx.heap[id];
 
                if_stmt.end_if = Some(end_if);
 
            } else if ident == KW_STMT_WHILE {
 
                let id = self.consume_while_statement(module, iter, ctx)?;
 
                section.push(id.upcast());
 

	
 
                let end_while = ctx.heap.alloc_end_while_statement(|this| EndWhileStatement{
 
                    this, start_while: id, next: None
 
                });
 
                section.push(id.upcast());
 

	
 
                let while_stmt = &mut ctx.heap[id];
 
                while_stmt.end_while = Some(end_while);
 
            } else if ident == KW_STMT_BREAK {
 
                let id = self.consume_break_statement(module, iter, ctx)?;
 
                section.push(id.upcast());
 
            } else if ident == KW_STMT_CONTINUE {
 
                let id = self.consume_continue_statement(module, iter, ctx)?;
 
                section.push(id.upcast());
 
            } else if ident == KW_STMT_SYNC {
 
                let id = self.consume_synchronous_statement(module, iter, ctx)?;
 
                section.push(id.upcast());
 

	
 
                let end_sync = ctx.heap.alloc_end_synchronous_statement(|this| EndSynchronousStatement{
 
                    this, start_sync: id, next: None
 
                });
 

	
 
                let sync_stmt = &mut ctx.heap[id];
 
                sync_stmt.end_sync = Some(end_sync);
 
            } else if ident == KW_STMT_RETURN {
 
                let id = self.consume_return_statement(module, iter, ctx)?;
 
                section.push(id.upcast());
 
            } else if ident == KW_STMT_GOTO {
 
                let id = self.consume_goto_statement(module, iter, ctx)?;
 
                section.push(id.upcast());
 
            } else if ident == KW_STMT_NEW {
 
                let id = self.consume_new_statement(module, iter, ctx)?;
 
                section.push(id.upcast());
 
            } else if ident == KW_STMT_CHANNEL {
 
                let id = self.consume_channel_statement(module, iter, ctx)?;
 
                section.push(id.upcast().upcast());
 
            } else if iter.peek() == Some(TokenKind::Colon) {
 
                self.consume_labeled_statement(module, iter, ctx, section)?;
 
            } else {
 
                // Two fallback possibilities: the first one is a memory
 
                // declaration, the other one is to parse it as a regular
 
                // expression. This is a bit ugly
 
                if let Some((memory_stmt_id, assignment_stmt_id)) = self.maybe_consume_memory_statement(module, iter, ctx)? {
 
                    section.push(memory_stmt_id.upcast().upcast());
 
                    section.push(assignment_stmt_id.upcast());
 
                } else {
 
                    let id = self.consume_expression_statement(module, iter, ctx)?;
 
                    section.push(id.upcast());
 
                }
 
            }
 
        };
 

	
 
        return Ok(());
 
    }
 

	
 
    fn consume_block_statement(
 
        &mut self, module: &Module, iter: &mut TokenIter, ctx: &mut PassCtx
 
    ) -> Result<BlockStatementId, ParseError> {
 
        let open_span = consume_token(&module.source, iter, TokenKind::OpenCurly)?;
 
        self.consume_block_statement_without_leading_curly(module, iter, ctx, open_span.begin)
 
    }
 

	
 
    fn consume_block_statement_without_leading_curly(
 
        &mut self, module: &Module, iter: &mut TokenIter, ctx: &mut PassCtx, open_curly_pos: InputPosition
 
    ) -> Result<BlockStatementId, ParseError> {
 
        let mut stmt_section = self.statements.start_section();
 
        let mut next = iter.next();
 
        while next != Some(TokenKind::CloseCurly) {
 
            if next.is_none() {
 
                return Err(ParseError::new_error_str_at_pos(
 
                    &module.source, iter.last_valid_pos(), "expected a statement or '}'"
 
                ));
 
            }
 
            self.consume_statement(module, iter, ctx, &mut stmt_section)?;
 
            next = iter.next();
 
        }
 

	
 
        let statements = stmt_section.into_vec();
 
        let mut block_span = consume_token(&module.source, iter, TokenKind::CloseCurly)?;
 
        block_span.begin = open_curly_pos;
 

	
 
        Ok(ctx.heap.alloc_block_statement(|this| BlockStatement{
 
            this,
 
            is_implicit: false,
 
            span: block_span,
 
            statements,
 
            parent_scope: None,
 
            relative_pos_in_parent: 0,
 
            locals: Vec::new(),
 
            labels: Vec::new(),
 
        }))
 
    }
 

	
 
    fn consume_if_statement(
 
        &mut self, module: &Module, iter: &mut TokenIter, ctx: &mut PassCtx
 
    ) -> Result<IfStatementId, ParseError> {
 
        let if_span = consume_exact_ident(&module.source, iter, KW_STMT_IF)?;
 
        consume_token(&module.source, iter, TokenKind::OpenParen)?;
 
        let test = self.consume_expression(module, iter, ctx)?;
 
        consume_token(&module.source, iter, TokenKind::CloseParen)?;
 
        let true_body = self.consume_block_or_wrapped_statement(module, iter, ctx)?;
 

	
 
        let false_body = if has_ident(&module.source, iter, KW_STMT_ELSE) {
 
            iter.consume();
 
            let false_body = self.consume_block_or_wrapped_statement(module, iter, ctx)?;
 
            Some(false_body)
 
        } else {
 
            None
 
        };
 

	
 
        Ok(ctx.heap.alloc_if_statement(|this| IfStatement{
 
            this,
 
            span: if_span,
 
            test,
 
            true_body,
 
            false_body,
 
            end_if: None,
 
        }))
 
    }
 

	
 
    fn consume_while_statement(
 
        &mut self, module: &Module, iter: &mut TokenIter, ctx: &mut PassCtx
 
    ) -> Result<WhileStatementId, ParseError> {
 
        let while_span = consume_exact_ident(&module.source, iter, KW_STMT_WHILE)?;
 
        consume_token(&module.source, iter, TokenKind::OpenParen)?;
 
        let test = self.consume_expression(module, iter, ctx)?;
 
        consume_token(&module.source, iter, TokenKind::CloseParen)?;
 
        let body = self.consume_block_or_wrapped_statement(module, iter, ctx)?;
 

	
 
        Ok(ctx.heap.alloc_while_statement(|this| WhileStatement{
 
            this,
 
            span: while_span,
 
            test,
 
            body,
 
            end_while: None,
 
            in_sync: None,
 
        }))
 
    }
 

	
 
    fn consume_break_statement(
 
        &mut self, module: &Module, iter: &mut TokenIter, ctx: &mut PassCtx
 
    ) -> Result<BreakStatementId, ParseError> {
 
        let break_span = consume_exact_ident(&module.source, iter, KW_STMT_BREAK)?;
 
        let label = if Some(TokenKind::Ident) == iter.next() {
 
            let label = consume_ident_interned(&module.source, iter, ctx)?;
 
            Some(label)
 
        } else {
 
            None
 
        };
 
        consume_token(&module.source, iter, TokenKind::SemiColon)?;
 
        Ok(ctx.heap.alloc_break_statement(|this| BreakStatement{
 
            this,
 
            span: break_span,
 
            label,
 
            target: None,
 
        }))
 
    }
 

	
 
    fn consume_continue_statement(
 
        &mut self, module: &Module, iter: &mut TokenIter, ctx: &mut PassCtx
 
    ) -> Result<ContinueStatementId, ParseError> {
 
        let continue_span = consume_exact_ident(&module.source, iter, KW_STMT_CONTINUE)?;
 
        let label=  if Some(TokenKind::Ident) == iter.next() {
 
            let label = consume_ident_interned(&module.source, iter, ctx)?;
 
            Some(label)
 
        } else {
 
            None
 
        };
 
        consume_token(&module.source, iter, TokenKind::SemiColon)?;
 
        Ok(ctx.heap.alloc_continue_statement(|this| ContinueStatement{
 
            this,
 
            span: continue_span,
 
            label,
 
            target: None
 
        }))
 
    }
 

	
 
    fn consume_synchronous_statement(
 
        &mut self, module: &Module, iter: &mut TokenIter, ctx: &mut PassCtx
 
    ) -> Result<SynchronousStatementId, ParseError> {
 
        let synchronous_span = consume_exact_ident(&module.source, iter, KW_STMT_SYNC)?;
 
        let body = self.consume_block_or_wrapped_statement(module, iter, ctx)?;
 

	
 
        Ok(ctx.heap.alloc_synchronous_statement(|this| SynchronousStatement{
 
            this,
 
            span: synchronous_span,
 
            body,
 
            end_sync: None,
 
            parent_scope: None,
 
        }))
 
    }
 

	
 
    fn consume_return_statement(
 
        &mut self, module: &Module, iter: &mut TokenIter, ctx: &mut PassCtx
 
    ) -> Result<ReturnStatementId, ParseError> {
 
        let return_span = consume_exact_ident(&module.source, iter, KW_STMT_RETURN)?;
 
        let mut scoped_section = self.expressions.start_section();
 

	
 
        consume_comma_separated_until(
 
            TokenKind::SemiColon, &module.source, iter, ctx,
 
            |_source, iter, ctx| self.consume_expression(module, iter, ctx),
 
            &mut scoped_section, "a return expression", None
 
        )?;
 
        let expressions = scoped_section.into_vec();
 

	
 
        if expressions.is_empty() {
 
            return Err(ParseError::new_error_str_at_span(&module.source, return_span, "expected at least one return value"));
 
        } else if expressions.len() > 1 {
 
            return Err(ParseError::new_error_str_at_span(&module.source, return_span, "multiple return values are not (yet) supported"))
 
        }
 

	
 
        Ok(ctx.heap.alloc_return_statement(|this| ReturnStatement{
 
            this,
 
            span: return_span,
 
            expressions
 
        }))
 
    }
 

	
 
    fn consume_goto_statement(
 
        &mut self, module: &Module, iter: &mut TokenIter, ctx: &mut PassCtx
 
    ) -> Result<GotoStatementId, ParseError> {
 
        let goto_span = consume_exact_ident(&module.source, iter, KW_STMT_GOTO)?;
 
        let label = consume_ident_interned(&module.source, iter, ctx)?;
 
        consume_token(&module.source, iter, TokenKind::SemiColon)?;
 
        Ok(ctx.heap.alloc_goto_statement(|this| GotoStatement{
 
            this,
 
            span: goto_span,
 
            label,
 
            target: None
 
        }))
 
    }
 

	
 
    fn consume_new_statement(
 
        &mut self, module: &Module, iter: &mut TokenIter, ctx: &mut PassCtx
 
    ) -> Result<NewStatementId, ParseError> {
 
        let new_span = consume_exact_ident(&module.source, iter, KW_STMT_NEW)?;
 

	
 
        // TODO: @Cleanup, should just call something like consume_component_expression-ish
 
        let start_pos = iter.last_valid_pos();
 
        let expression_id = self.consume_primary_expression(module, iter, ctx)?;
 
        let expression = &ctx.heap[expression_id];
 
        let mut valid = false;
 

	
 
        let mut call_id = CallExpressionId::new_invalid();
 
        if let Expression::Call(expression) = expression {
 
            // Allow both components and functions, as it makes more sense to
 
            // check their correct use in the validation and linking pass
 
            if expression.method == Method::UserComponent || expression.method == Method::UserFunction {
 
                call_id = expression.this;
 
                valid = true;
 
            }
 
        }
 

	
 
        if !valid {
 
            return Err(ParseError::new_error_str_at_span(
 
                &module.source, InputSpan::from_positions(start_pos, iter.last_valid_pos()), "expected a call expression"
 
            ));
 
        }
 
        consume_token(&module.source, iter, TokenKind::SemiColon)?;
 

	
 
        debug_assert!(!call_id.is_invalid());
 
        Ok(ctx.heap.alloc_new_statement(|this| NewStatement{
 
            this,
 
            span: new_span,
 
            expression: call_id,
 
            next: None
 
        }))
 
    }
 

	
 
    fn consume_channel_statement(
 
        &mut self, module: &Module, iter: &mut TokenIter, ctx: &mut PassCtx
 
    ) -> Result<ChannelStatementId, ParseError> {
 
        // Consume channel specification
 
        let channel_span = consume_exact_ident(&module.source, iter, KW_STMT_CHANNEL)?;
 
        let channel_type = if Some(TokenKind::OpenAngle) == iter.next() {
 
            // Retrieve the type of the channel, we're cheating a bit here by
 
            // consuming the first '<' and setting the initial angle depth to 1
 
            // such that our final '>' will be consumed as well.
 
            iter.consume();
 
            let definition_id = self.cur_definition;
 
            let poly_vars = ctx.heap[definition_id].poly_vars();
 
            consume_parser_type(
 
                &module.source, iter, &ctx.symbols, &ctx.heap,
 
                &poly_vars, SymbolScope::Module(module.root_id), definition_id,
 
                true, 1
 
            )?
 
        } else {
 
            // Assume inferred
 
            ParserType{ elements: vec![ParserTypeElement{
 
                full_span: channel_span, // TODO: @Span fix
 
                variant: ParserTypeVariant::Inferred
 
            }]}
 
        };
 

	
 
        let from_identifier = consume_ident_interned(&module.source, iter, ctx)?;
 
        consume_token(&module.source, iter, TokenKind::ArrowRight)?;
 
        let to_identifier = consume_ident_interned(&module.source, iter, ctx)?;
 
        consume_token(&module.source, iter, TokenKind::SemiColon)?;
 

	
 
        // Construct ports
 
        let from = ctx.heap.alloc_local(|this| Local{
 
            this,
 
            identifier: from_identifier,
 
            parser_type: channel_type.clone(),
 
            relative_pos_in_block: 0,
 
        });
 
        let to = ctx.heap.alloc_local(|this| Local{
 
            this,
 
            identifier: to_identifier,
 
            parser_type: channel_type,
 
            relative_pos_in_block: 0,
 
        });
 

	
 
        // Construct the channel
 
        Ok(ctx.heap.alloc_channel_statement(|this| ChannelStatement{
 
            this,
 
            span: channel_span,
 
            from, to,
 
            relative_pos_in_block: 0,
 
            next: None,
 
        }))
 
    }
 

	
 
    fn consume_labeled_statement(
 
        &mut self, module: &Module, iter: &mut TokenIter, ctx: &mut PassCtx, section: &mut ScopedSection<StatementId>
 
    ) -> Result<(), ParseError> {
 
        let label = consume_ident_interned(&module.source, iter, ctx)?;
 
        consume_token(&module.source, iter, TokenKind::Colon)?;
 

	
 
        // Not pretty: consume_statement may produce more than one statement.
 
        // The values in the section need to be in the correct order if some
 
        // kind of outer block is consumed, so we take another section, push
 
        // the expressions in that one, and then allocate the labeled statement.
 
        let mut inner_section = self.statements.start_section();
 
        self.consume_statement(module, iter, ctx, &mut inner_section)?;
 
        debug_assert!(inner_section.len() >= 1);
 

	
 
        let stmt_id = ctx.heap.alloc_labeled_statement(|this| LabeledStatement {
 
            this,
 
            label,
 
            body: inner_section[0],
 
            relative_pos_in_block: 0,
 
            in_sync: None,
 
        });
 

	
 
        if inner_section.len() == 1 {
 
            // Produce the labeled statement pointing to the first statement.
 
            // This is by far the most common case.
 
            inner_section.forget();
 
            section.push(stmt_id.upcast());
 
        } else {
 
            // Produce the labeled statement using the first statement, and push
 
            // the remaining ones at the end.
 
            let inner_statements = inner_section.into_vec();
 
            section.push(stmt_id.upcast());
 
            for idx in 1..inner_statements.len() {
 
                section.push(inner_statements[idx])
 
            }
 
        }
 

	
 
        Ok(())
 
    }
 

	
 
    fn maybe_consume_memory_statement(
 
        &mut self, module: &Module, iter: &mut TokenIter, ctx: &mut PassCtx
 
    ) -> Result<Option<(MemoryStatementId, ExpressionStatementId)>, ParseError> {
 
        // This is a bit ugly. It would be nicer if we could somehow
 
        // consume the expression with a type hint if we do get a valid
 
        // type, but we don't get an identifier following it
 
        let iter_state = iter.save();
 
        let definition_id = self.cur_definition;
 
        let poly_vars = ctx.heap[definition_id].poly_vars();
 

	
 
        let parser_type = consume_parser_type(
 
            &module.source, iter, &ctx.symbols, &ctx.heap, poly_vars,
 
            SymbolScope::Definition(definition_id), definition_id, true, 0
 
        );
 

	
 
        if let Ok(parser_type) = parser_type {
 
            if Some(TokenKind::Ident) == iter.next() {
 
                // Assume this is a proper memory statement
 
                let identifier = consume_ident_interned(&module.source, iter, ctx)?;
 
                let memory_span = InputSpan::from_positions(parser_type.elements[0].full_span.begin, identifier.span.end);
 
                let assign_span = consume_token(&module.source, iter, TokenKind::Equal)?;
 

	
 
                let initial_expr_begin_pos = iter.last_valid_pos();
 
                let initial_expr_id = self.consume_expression(module, iter, ctx)?;
 
                let initial_expr_end_pos = iter.last_valid_pos();
 
                consume_token(&module.source, iter, TokenKind::SemiColon)?;
 

	
 
                // Allocate the memory statement with the variable
 
                let local_id = ctx.heap.alloc_local(|this| Local{
 
                    this,
 
                    identifier: identifier.clone(),
 
                    parser_type,
 
                    relative_pos_in_block: 0,
 
                });
 
                let memory_stmt_id = ctx.heap.alloc_memory_statement(|this| MemoryStatement{
 
                    this,
 
                    span: memory_span,
 
                    variable: local_id,
 
                    next: None
 
                });
 

	
 
                // Allocate the initial assignment
 
                let variable_expr_id = ctx.heap.alloc_variable_expression(|this| VariableExpression{
 
                    this,
 
                    identifier,
 
                    declaration: None,
 
                    parent: ExpressionParent::None,
 
                    concrete_type: Default::default()
 
                });
 
                let assignment_expr_id = ctx.heap.alloc_assignment_expression(|this| AssignmentExpression{
 
                    this,
 
                    span: assign_span,
 
                    left: variable_expr_id.upcast(),
 
                    operation: AssignmentOperator::Set,
 
                    right: initial_expr_id,
 
                    parent: ExpressionParent::None,
 
                    concrete_type: Default::default(),
 
                });
 
                let assignment_stmt_id = ctx.heap.alloc_expression_statement(|this| ExpressionStatement{
 
                    this,
 
                    span: InputSpan::from_positions(initial_expr_begin_pos, initial_expr_end_pos),
 
                    expression: assignment_expr_id.upcast(),
 
                    next: None,
 
                });
 

	
 
                return Ok(Some((memory_stmt_id, assignment_stmt_id)))
 
            }
 
        }
 

	
 
        // If here then one of the preconditions for a memory statement was not
 
        // met. So recover the iterator and return
 
        iter.load(iter_state);
 
        Ok(None)
 
    }
 

	
 
    fn consume_expression_statement(
 
        &mut self, module: &Module, iter: &mut TokenIter, ctx: &mut PassCtx
 
    ) -> Result<ExpressionStatementId, ParseError> {
 
        let start_pos = iter.last_valid_pos();
 
        let expression = self.consume_expression(module, iter, ctx)?;
 
        let end_pos = iter.last_valid_pos();
 
        consume_token(&module.source, iter, TokenKind::SemiColon)?;
 

	
 
        Ok(ctx.heap.alloc_expression_statement(|this| ExpressionStatement{
 
            this,
 
            span: InputSpan::from_positions(start_pos, end_pos),
 
            expression,
 
            next: None,
 
        }))
 
    }
 

	
 
    //--------------------------------------------------------------------------
 
    // Expression Parsing
 
    //--------------------------------------------------------------------------
 

	
 
    fn consume_expression(
 
        &mut self, module: &Module, iter: &mut TokenIter, ctx: &mut PassCtx
 
    ) -> Result<ExpressionId, ParseError> {
 
        self.consume_assignment_expression(module, iter, ctx)
 
    }
 

	
 
    fn consume_assignment_expression(
 
        &mut self, module: &Module, iter: &mut TokenIter, ctx: &mut PassCtx
 
    ) -> Result<ExpressionId, ParseError> {
 
        // Utility to convert token into assignment operator
 
        fn parse_assignment_operator(token: Option<TokenKind>) -> Option<AssignmentOperator> {
 
            use TokenKind as TK;
 
            use AssignmentOperator as AO;
 

	
 
            if token.is_none() {
 
                return None
 
            }
 

	
 
            match token.unwrap() {
 
                TK::Equal               => Some(AO::Set),
 
                TK::StarEquals          => Some(AO::Multiplied),
 
                TK::SlashEquals         => Some(AO::Divided),
 
                TK::PercentEquals       => Some(AO::Remained),
 
                TK::PlusEquals          => Some(AO::Added),
 
                TK::MinusEquals         => Some(AO::Subtracted),
 
                TK::ShiftLeftEquals     => Some(AO::ShiftedLeft),
 
                TK::ShiftRightEquals    => Some(AO::ShiftedRight),
 
                TK::AndEquals           => Some(AO::BitwiseAnded),
 
                TK::CaretEquals         => Some(AO::BitwiseXored),
 
                TK::OrEquals            => Some(AO::BitwiseOred),
 
                _                       => None
 
            }
 
        }
 

	
 
        let expr = self.consume_conditional_expression(module, iter, ctx)?;
 
        if let Some(operation) = parse_assignment_operator(iter.next()) {
 
            let span = iter.next_span();
 
            iter.consume();
 

	
 
            let left = expr;
 
            let right = self.consume_expression(module, iter, ctx)?;
 

	
 
            Ok(ctx.heap.alloc_assignment_expression(|this| AssignmentExpression{
 
                this, span, left, operation, right,
 
                parent: ExpressionParent::None,
 
                concrete_type: ConcreteType::default(),
 
            }).upcast())
 
        } else {
 
            Ok(expr)
 
        }
 
    }
 

	
 
    fn consume_conditional_expression(
 
        &mut self, module: &Module, iter: &mut TokenIter, ctx: &mut PassCtx
 
    ) -> Result<ExpressionId, ParseError> {
 
        let result = self.consume_concat_expression(module, iter, ctx)?;
 
        if let Some(TokenKind::Question) = iter.next() {
 
            let span = iter.next_span();
 
            iter.consume();
 

	
 
            let test = result;
 
            let true_expression = self.consume_expression(module, iter, ctx)?;
 
            consume_token(&module.source, iter, TokenKind::Colon)?;
 
            let false_expression = self.consume_expression(module, iter, ctx)?;
 
            Ok(ctx.heap.alloc_conditional_expression(|this| ConditionalExpression{
 
                this, span, test, true_expression, false_expression,
 
                parent: ExpressionParent::None,
 
                concrete_type: ConcreteType::default(),
 
            }).upcast())
 
        } else {
 
            Ok(result)
 
        }
 
    }
 

	
 
    fn consume_concat_expression(
 
        &mut self, module: &Module, iter: &mut TokenIter, ctx: &mut PassCtx
 
    ) -> Result<ExpressionId, ParseError> {
 
        self.consume_generic_binary_expression(
 
            module, iter, ctx,
 
            |token| match token {
 
                Some(TokenKind::At) => Some(BinaryOperator::Concatenate),
 
                _ => None
 
            },
 
            Self::consume_logical_or_expression
 
        )
 
    }
 

	
 
    fn consume_logical_or_expression(
 
        &mut self, module: &Module, iter: &mut TokenIter, ctx: &mut PassCtx
 
    ) -> Result<ExpressionId, ParseError> {
 
        self.consume_generic_binary_expression(
 
            module, iter, ctx,
 
            |token| match token {
 
                Some(TokenKind::OrOr) => Some(BinaryOperator::LogicalOr),
 
                _ => None
 
            },
 
            Self::consume_logical_and_expression
 
        )
 
    }
 

	
 
    fn consume_logical_and_expression(
 
        &mut self, module: &Module, iter: &mut TokenIter, ctx: &mut PassCtx
 
    ) -> Result<ExpressionId, ParseError> {
 
        self.consume_generic_binary_expression(
 
            module, iter, ctx,
 
            |token| match token {
 
                Some(TokenKind::AndAnd) => Some(BinaryOperator::LogicalAnd),
 
                _ => None
 
            },
 
            Self::consume_bitwise_or_expression
 
        )
 
    }
 

	
 
    fn consume_bitwise_or_expression(
 
        &mut self, module: &Module, iter: &mut TokenIter, ctx: &mut PassCtx
 
    ) -> Result<ExpressionId, ParseError> {
 
        self.consume_generic_binary_expression(
 
            module, iter, ctx,
 
            |token| match token {
 
                Some(TokenKind::Or) => Some(BinaryOperator::BitwiseOr),
 
                _ => None
 
            },
 
            Self::consume_bitwise_xor_expression
 
        )
 
    }
 

	
 
    fn consume_bitwise_xor_expression(
 
        &mut self, module: &Module, iter: &mut TokenIter, ctx: &mut PassCtx
 
    ) -> Result<ExpressionId, ParseError> {
 
        self.consume_generic_binary_expression(
 
            module, iter, ctx,
 
            |token| match token {
 
                Some(TokenKind::Caret) => Some(BinaryOperator::BitwiseXor),
 
                _ => None
 
            },
 
            Self::consume_bitwise_and_expression
 
        )
 
    }
 

	
 
    fn consume_bitwise_and_expression(
 
        &mut self, module: &Module, iter: &mut TokenIter, ctx: &mut PassCtx
 
    ) -> Result<ExpressionId, ParseError> {
 
        self.consume_generic_binary_expression(
 
            module, iter, ctx,
 
            |token| match token {
 
                Some(TokenKind::And) => Some(BinaryOperator::BitwiseAnd),
 
                _ => None
 
            },
 
            Self::consume_equality_expression
 
        )
 
    }
 

	
 
    fn consume_equality_expression(
 
        &mut self, module: &Module, iter: &mut TokenIter, ctx: &mut PassCtx
 
    ) -> Result<ExpressionId, ParseError> {
 
        self.consume_generic_binary_expression(
 
            module, iter, ctx,
 
            |token| match token {
 
                Some(TokenKind::EqualEqual) => Some(BinaryOperator::Equality),
 
                Some(TokenKind::NotEqual) => Some(BinaryOperator::Inequality),
 
                _ => None
 
            },
 
            Self::consume_relational_expression
 
        )
 
    }
 

	
 
    fn consume_relational_expression(
 
        &mut self, module: &Module, iter: &mut TokenIter, ctx: &mut PassCtx
 
    ) -> Result<ExpressionId, ParseError> {
 
        self.consume_generic_binary_expression(
 
            module, iter, ctx,
 
            |token| match token {
 
                Some(TokenKind::OpenAngle) => Some(BinaryOperator::LessThan),
 
                Some(TokenKind::CloseAngle) => Some(BinaryOperator::GreaterThan),
 
                Some(TokenKind::LessEquals) => Some(BinaryOperator::LessThanEqual),
 
                Some(TokenKind::GreaterEquals) => Some(BinaryOperator::GreaterThanEqual),
 
                _ => None
 
            },
 
            Self::consume_shift_expression
 
        )
 
    }
 

	
 
    fn consume_shift_expression(
 
        &mut self, module: &Module, iter: &mut TokenIter, ctx: &mut PassCtx
 
    ) -> Result<ExpressionId, ParseError> {
 
        self.consume_generic_binary_expression(
 
            module, iter, ctx,
 
            |token| match token {
 
                Some(TokenKind::ShiftLeft) => Some(BinaryOperator::ShiftLeft),
 
                Some(TokenKind::ShiftRight) => Some(BinaryOperator::ShiftRight),
 
                _ => None
 
            },
 
            Self::consume_add_or_subtract_expression
 
        )
 
    }
 

	
 
    fn consume_add_or_subtract_expression(
 
        &mut self, module: &Module, iter: &mut TokenIter, ctx: &mut PassCtx
 
    ) -> Result<ExpressionId, ParseError> {
 
        self.consume_generic_binary_expression(
 
            module, iter, ctx,
 
            |token| match token {
 
                Some(TokenKind::Plus) => Some(BinaryOperator::Add),
 
                Some(TokenKind::Minus) => Some(BinaryOperator::Subtract),
 
                _ => None,
 
            },
 
            Self::consume_multiply_divide_or_modulus_expression
 
        )
 
    }
 

	
 
    fn consume_multiply_divide_or_modulus_expression(
 
        &mut self, module: &Module, iter: &mut TokenIter, ctx: &mut PassCtx
 
    ) -> Result<ExpressionId, ParseError> {
 
        self.consume_generic_binary_expression(
 
            module, iter, ctx,
 
            |token| match token {
 
                Some(TokenKind::Star) => Some(BinaryOperator::Multiply),
 
                Some(TokenKind::Slash) => Some(BinaryOperator::Divide),
 
                Some(TokenKind::Percent) => Some(BinaryOperator::Remainder),
 
                _ => None
 
            },
 
            Self::consume_prefix_expression
 
        )
 
    }
 

	
 
    fn consume_prefix_expression(
 
        &mut self, module: &Module, iter: &mut TokenIter, ctx: &mut PassCtx
 
    ) -> Result<ExpressionId, ParseError> {
 
        fn parse_prefix_token(token: Option<TokenKind>) -> Option<UnaryOperation> {
 
            use TokenKind as TK;
 
            use UnaryOperation as UO;
 
            match token {
 
                Some(TK::Plus) => Some(UO::Positive),
 
                Some(TK::Minus) => Some(UO::Negative),
 
                Some(TK::PlusPlus) => Some(UO::PreIncrement),
 
                Some(TK::MinusMinus) => Some(UO::PreDecrement),
 
                Some(TK::Tilde) => Some(UO::BitwiseNot),
 
                Some(TK::Exclamation) => Some(UO::LogicalNot),
 
                _ => None
 
            }
 
        }
 

	
 
        if let Some(operation) = parse_prefix_token(iter.next()) {
 
            let span = iter.next_span();
 
            iter.consume();
 

	
 
            let expression = self.consume_prefix_expression(module, iter, ctx)?;
 
            Ok(ctx.heap.alloc_unary_expression(|this| UnaryExpression {
 
                this, span, operation, expression,
 
                parent: ExpressionParent::None,
 
                concrete_type: ConcreteType::default()
 
            }).upcast())
 
        } else {
 
            self.consume_postfix_expression(module, iter, ctx)
 
        }
 
    }
 

	
 
    fn consume_postfix_expression(
 
        &mut self, module: &Module, iter: &mut TokenIter, ctx: &mut PassCtx
 
    ) -> Result<ExpressionId, ParseError> {
 
        fn has_matching_postfix_token(token: Option<TokenKind>) -> bool {
 
            use TokenKind as TK;
 

	
 
            if token.is_none() { return false; }
 
            match token.unwrap() {
 
                TK::PlusPlus | TK::MinusMinus | TK::OpenSquare | TK::Dot => true,
 
                _ => false
 
            }
 
        }
 

	
 
        let mut result = self.consume_primary_expression(module, iter, ctx)?;
 
        let mut next = iter.next();
 
        while has_matching_postfix_token(next) {
 
            let token = next.unwrap();
 
            let mut span = iter.next_span();
 
            iter.consume();
 

	
 
            if token == TokenKind::PlusPlus {
 
                result = ctx.heap.alloc_unary_expression(|this| UnaryExpression{
 
                    this, span,
 
                    operation: UnaryOperation::PostIncrement,
 
                    expression: result,
 
                    parent: ExpressionParent::None,
 
                    concrete_type: ConcreteType::default()
 
                }).upcast();
 
            } else if token == TokenKind::MinusMinus {
 
                result = ctx.heap.alloc_unary_expression(|this| UnaryExpression{
 
                    this, span,
 
                    operation: UnaryOperation::PostDecrement,
 
                    expression: result,
 
                    parent: ExpressionParent::None,
 
                    concrete_type: ConcreteType::default()
 
                }).upcast();
 
            } else if token == TokenKind::OpenSquare {
src/protocol/parser/pass_imports.rs
Show inline comments
 
use crate::protocol::ast::*;
 
use super::symbol_table::*;
 
use super::{Module, ModuleCompilationPhase, PassCtx};
 
use super::tokens::*;
 
use super::token_parsing::*;
 
use crate::protocol::input_source::{InputSource as InputSource, InputSpan, ParseError};
 
use crate::collections::*;
 

	
 
/// Parses all the imports in the module tokens. Is applied after the
 
/// definitions and name of modules are resolved. Hence we should be able to
 
/// resolve all symbols to their appropriate module/definition.
 
pub(crate) struct PassImport {
 
    imports: Vec<ImportId>,
 
    found_symbols: Vec<(AliasedSymbol, SymbolDefinition)>,
 
    scoped_symbols: Vec<Symbol>,
 
}
 

	
 
impl PassImport {
 
    pub(crate) fn new() -> Self {
 
        Self{
 
            imports: Vec::with_capacity(32),
 
            found_symbols: Vec::with_capacity(32),
 
            scoped_symbols: Vec::with_capacity(32),
 
        }
 
    }
 
    pub(crate) fn parse(&mut self, modules: &mut [Module], module_idx: usize, ctx: &mut PassCtx) -> Result<(), ParseError> {
 
        let module = &modules[module_idx];
 
        let module_range = &module.tokens.ranges[0];
 
        debug_assert!(modules.iter().all(|m| m.phase >= ModuleCompilationPhase::SymbolsScanned));
 
        debug_assert_eq!(module.phase, ModuleCompilationPhase::SymbolsScanned);
 
        debug_assert_eq!(module_range.range_kind, TokenRangeKind::Module);
 

	
 
        let mut range_idx = module_range.first_child_idx;
 
        loop {
 
            let range_idx_usize = range_idx as usize;
 
            let cur_range = &module.tokens.ranges[range_idx_usize];
 

	
 
            if cur_range.range_kind == TokenRangeKind::Import {
 
                self.visit_import_range(modules, module_idx, ctx, range_idx_usize)?;
 
            }
 

	
 
            match cur_range.next_sibling_idx {
 
                Some(idx) => { range_idx = idx; },
 
                None => { break; }
 
            if cur_range.next_sibling_idx == NO_SIBLING {
 
                break;
 
            } else {
 
                range_idx = cur_range.next_sibling_idx;
 
            }
 
        }
 

	
 
        let root = &mut ctx.heap[module.root_id];
 
        root.imports.extend(self.imports.drain(..));
 

	
 
        let module = &mut modules[module_idx];
 
        module.phase = ModuleCompilationPhase::ImportsResolved;
 

	
 
        Ok(())
 
    }
 

	
 
    pub(crate) fn visit_import_range(
 
        &mut self, modules: &[Module], module_idx: usize, ctx: &mut PassCtx, range_idx: usize
 
    ) -> Result<(), ParseError> {
 
        let module = &modules[module_idx];
 
        let import_range = &module.tokens.ranges[range_idx];
 
        debug_assert_eq!(import_range.range_kind, TokenRangeKind::Import);
 

	
 
        let mut iter = module.tokens.iter_range(import_range);
 

	
 
        // Consume "import"
 
        let (_import_ident, import_span) =
 
            consume_any_ident(&module.source, &mut iter)?;
 
        debug_assert_eq!(_import_ident, KW_IMPORT);
 

	
 
        // Consume module name
 
        let (module_name, module_name_span) = consume_domain_ident(&module.source, &mut iter)?;
 
        let target_root_id = ctx.symbols.get_module_by_name(module_name);
 
        if target_root_id.is_none() {
 
            return Err(ParseError::new_error_at_span(
 
                &module.source, module_name_span,
 
                format!("could not resolve module '{}'", String::from_utf8_lossy(module_name))
 
            ));
 
        }
 
        let module_name = ctx.pool.intern(module_name);
 
        let module_identifier = Identifier{ span: module_name_span, value: module_name };
 
        let target_root_id = target_root_id.unwrap();
 

	
 
        // Check for subsequent characters (alias, multiple imported symbols)
 
        let next = iter.next();
 
        let import_id;
 

	
 
        if has_ident(&module.source, &mut iter, b"as") {
 
            // Alias for module
 
            iter.consume();
 
            let alias_identifier = consume_ident_interned(&module.source, &mut iter, ctx)?;
 
            let alias_name = alias_identifier.value.clone();
 

	
 
            import_id = ctx.heap.alloc_import(|this| Import::Module(ImportModule{
 
                this,
 
                span: import_span,
 
                module: module_identifier,
 
                alias: alias_identifier,
 
                module_id: target_root_id
 
            }));
 
            ctx.symbols.insert_symbol(SymbolScope::Module(module.root_id), Symbol{
 
                name: alias_name,
 
                variant: SymbolVariant::Module(SymbolModule{
 
                    root_id: target_root_id,
 
                    introduced_at: import_id,
 
                }),
 
            });
 
        } else if Some(TokenKind::ColonColon) == next {
 
            iter.consume();
 

	
 
            // Helper function to consume symbols, their alias, and the
 
            // definition the symbol is pointing to.
 
            fn consume_symbol_and_maybe_alias<'a>(
 
                source: &'a InputSource, iter: &mut TokenIter, ctx: &mut PassCtx,
 
                module_name: &StringRef<'static>, module_root_id: RootId,
 
            ) -> Result<(AliasedSymbol, SymbolDefinition), ParseError> {
 
                // Consume symbol name and make sure it points to an existing definition
 
                let symbol_identifier = consume_ident_interned(source, iter, ctx)?;
 

	
 
                // Consume alias text if specified
 
                let alias_identifier = if peek_ident(source, iter) == Some(b"as") {
 
                    // Consume alias
 
                    iter.consume();
 
                    Some(consume_ident_interned(source, iter, ctx)?)
 
                } else {
 
                    None
 
                };
 

	
 
                let target = ctx.symbols.get_symbol_by_name_defined_in_scope(
 
                    SymbolScope::Module(module_root_id), symbol_identifier.value.as_bytes()
 
                );
 

	
 
                if target.is_none() {
 
                    return Err(ParseError::new_error_at_span(
 
                        source, symbol_identifier.span,
 
                        format!(
 
                            "could not find symbol '{}' within module '{}'",
 
                            symbol_identifier.value.as_str(), module_name.as_str()
 
                        )
 
                    ));
 
                }
 
                let target = target.unwrap();
 
                debug_assert_ne!(target.class(), SymbolClass::Module);
 
                let target_definition = target.variant.as_definition();
 

	
 
                Ok((
 
                    AliasedSymbol{
 
                        name: symbol_identifier,
 
                        alias: alias_identifier,
 
                        definition_id: target_definition.definition_id,
 
                    },
 
                    target_definition.clone()
 
                ))
 
            }
 

	
 
            let next = iter.next();
 

	
 
            if Some(TokenKind::Ident) == next {
 
                // Importing a single symbol
 
                iter.consume();
 
                let (imported_symbol, symbol_definition) = consume_symbol_and_maybe_alias(
 
                    &module.source, &mut iter, ctx, &module_identifier.value, target_root_id
 
                )?;
 

	
 
                let alias_identifier = match imported_symbol.alias.as_ref() {
 
                    Some(alias) => alias.clone(),
 
                    None => imported_symbol.name.clone(),
 
                };
 

	
 
                import_id = ctx.heap.alloc_import(|this| Import::Symbols(ImportSymbols{
 
                    this,
 
                    span: InputSpan::from_positions(import_span.begin, alias_identifier.span.end),
 
                    module: module_identifier,
 
                    module_id: target_root_id,
 
                    symbols: vec![imported_symbol],
 
                }));
 
                if let Err((new_symbol, old_symbol)) = ctx.symbols.insert_symbol(
 
                    SymbolScope::Module(module.root_id),
 
                    Symbol{
 
                        name: alias_identifier.value,
 
                        variant: SymbolVariant::Definition(symbol_definition.into_imported(import_id))
 
                    }
 
                ) {
 
                    return Err(construct_symbol_conflict_error(
 
                        modules, module_idx, ctx, &new_symbol, &old_symbol
 
                    ));
 
                }
 
            } else if Some(TokenKind::OpenCurly) == next {
 
                // Importing multiple symbols
 
                let mut end_of_list = iter.last_valid_pos();
 
                consume_comma_separated(
 
                    TokenKind::OpenCurly, TokenKind::CloseCurly, &module.source, &mut iter, ctx,
 
                    |source, iter, ctx| consume_symbol_and_maybe_alias(
 
                        source, iter, ctx, &module_identifier.value, target_root_id
 
                    ),
 
                    &mut self.found_symbols, "a symbol", "a list of symbols to import", Some(&mut end_of_list)
 
                )?;
 

	
 
                // Preallocate import
 
                import_id = ctx.heap.alloc_import(|this| Import::Symbols(ImportSymbols {
 
                    this,
 
                    span: InputSpan::from_positions(import_span.begin, end_of_list),
 
                    module: module_identifier,
 
                    module_id: target_root_id,
 
                    symbols: Vec::with_capacity(self.found_symbols.len()),
 
                }));
 

	
 
                // Fill import symbols while inserting symbols in the
 
                // appropriate scope in the symbol table.
 
                let import = ctx.heap[import_id].as_symbols_mut();
 

	
 
                for (imported_symbol, symbol_definition) in self.found_symbols.drain(..) {
 
                    let import_name = match imported_symbol.alias.as_ref() {
 
                        Some(import) => import.value.clone(),
 
                        None => imported_symbol.name.value.clone()
 
                    };
 

	
 
                    import.symbols.push(imported_symbol);
 
                    if let Err((new_symbol, old_symbol)) = ctx.symbols.insert_symbol(
 
                        SymbolScope::Module(module.root_id), Symbol{
 
                            name: import_name,
 
                            variant: SymbolVariant::Definition(symbol_definition.into_imported(import_id))
 
                        }
 
                    ) {
 
                        return Err(construct_symbol_conflict_error(modules, module_idx, ctx, &new_symbol, &old_symbol));
 
                    }
 
                }
 
            } else if Some(TokenKind::Star) == next {
 
                // Import all symbols from the module
 
                let star_span = iter.next_span();
 

	
 
                iter.consume();
 
                self.scoped_symbols.clear();
 
                let _found = ctx.symbols.get_all_symbols_defined_in_scope(
 
                    SymbolScope::Module(target_root_id),
 
                    &mut self.scoped_symbols
 
                );
 
                debug_assert!(_found); // even modules without symbols should have a scope
 

	
 
                // Preallocate import
 
                import_id = ctx.heap.alloc_import(|this| Import::Symbols(ImportSymbols{
 
                    this,
 
                    span: InputSpan::from_positions(import_span.begin, star_span.end),
 
                    module: module_identifier,
 
                    module_id: target_root_id,
 
                    symbols: Vec::with_capacity(self.scoped_symbols.len())
 
                }));
 

	
 
                // Fill import AST node and symbol table
 
                let import = ctx.heap[import_id].as_symbols_mut();
 

	
 
                for symbol in self.scoped_symbols.drain(..) {
 
                    let symbol_name = symbol.name;
 
                    match symbol.variant {
 
                        SymbolVariant::Definition(symbol_definition) => {
 
                            import.symbols.push(AliasedSymbol{
 
                                name: Identifier{ span: star_span, value: symbol_name.clone() },
 
                                alias: None,
 
                                definition_id: symbol_definition.definition_id,
 
                            });
 

	
 
                            if let Err((new_symbol, old_symbol)) = ctx.symbols.insert_symbol(
 
                                SymbolScope::Module(module.root_id),
 
                                Symbol{
 
                                    name: symbol_name,
 
                                    variant: SymbolVariant::Definition(symbol_definition.into_imported(import_id))
 
                                }
 
                            ) {
 
                                return Err(construct_symbol_conflict_error(modules, module_idx, ctx, &new_symbol, &old_symbol));
 
                            }
 
                        },
 
                        _ => unreachable!(),
 
                    }
 
                }
 
            } else {
 
                return Err(ParseError::new_error_str_at_pos(
 
                    &module.source, iter.last_valid_pos(), "expected symbol name, '{' or '*'"
 
                ));
 
            }
 
        } else {
 
            // Assume implicit alias
 
            let module_name_str = module_identifier.value.clone();
 
            let last_ident_start = module_name_str.as_str().rfind('.').map_or(0, |v| v + 1);
 
            let alias_text = &module_name_str.as_bytes()[last_ident_start..];
 
            let alias = ctx.pool.intern(alias_text);
 
            let alias_span = InputSpan::from_positions(
 
                module_name_span.begin.with_offset(last_ident_start as u32),
 
                module_name_span.end
 
            );
 
            let alias_identifier = Identifier{ span: alias_span, value: alias.clone() };
 

	
 
            import_id = ctx.heap.alloc_import(|this| Import::Module(ImportModule{
 
                this,
 
                span: InputSpan::from_positions(import_span.begin, module_identifier.span.end),
 
                module: module_identifier,
 
                alias: alias_identifier,
 
                module_id: target_root_id,
 
            }));
 
            if let Err((new_symbol, old_symbol)) = ctx.symbols.insert_symbol(SymbolScope::Module(module.root_id), Symbol{
 
                name: alias,
 
                variant: SymbolVariant::Module(SymbolModule{
 
                    root_id: target_root_id,
 
                    introduced_at: import_id
 
                })
 
            }) {
 
                return Err(construct_symbol_conflict_error(modules, module_idx, ctx, &new_symbol, &old_symbol));
 
            }
 
        }
 

	
 
        // By now the `import_id` is set, just need to make sure that the import
 
        // properly ends with a semicolon
 
        consume_token(&module.source, &mut iter, TokenKind::SemiColon)?;
 
        self.imports.push(import_id);
 

	
 
        Ok(())
 
    }
 
}
src/protocol/parser/pass_symbols.rs
Show inline comments
 
use crate::protocol::ast::*;
 
use super::symbol_table::*;
 
use crate::protocol::input_source::{ParseError, InputSpan};
 
use super::tokens::*;
 
use super::token_parsing::*;
 
use super::{Module, ModuleCompilationPhase, PassCtx};
 

	
 
/// Scans the module and finds all module-level type definitions. These will be
 
/// added to the symbol table such that during AST-construction we know which
 
/// identifiers point to types. Will also parse all pragmas to determine module
 
/// names.
 
pub(crate) struct PassSymbols {
 
    symbols: Vec<Symbol>,
 
    pragmas: Vec<PragmaId>,
 
    imports: Vec<ImportId>,
 
    definitions: Vec<DefinitionId>,
 
    buffer: String,
 
    has_pragma_version: bool,
 
    has_pragma_module: bool,
 
}
 

	
 
impl PassSymbols {
 
    pub(crate) fn new() -> Self {
 
        Self{
 
            symbols: Vec::with_capacity(128),
 
            pragmas: Vec::with_capacity(8),
 
            imports: Vec::with_capacity(32),
 
            definitions: Vec::with_capacity(128),
 
            buffer: String::with_capacity(128),
 
            has_pragma_version: false,
 
            has_pragma_module: false,
 
        }
 
    }
 

	
 
    fn reset(&mut self) {
 
        self.symbols.clear();
 
        self.pragmas.clear();
 
        self.imports.clear();
 
        self.definitions.clear();
 
        self.has_pragma_version = false;
 
        self.has_pragma_module = false;
 
    }
 

	
 
    pub(crate) fn parse(&mut self, modules: &mut [Module], module_idx: usize, ctx: &mut PassCtx) -> Result<(), ParseError> {
 
        self.reset();
 

	
 
        let module = &mut modules[module_idx];
 
        let module_range = &module.tokens.ranges[0];
 

	
 
        debug_assert_eq!(module.phase, ModuleCompilationPhase::Tokenized);
 
        debug_assert_eq!(module_range.range_kind, TokenRangeKind::Module);
 
        debug_assert!(module.root_id.is_invalid()); // not set yet,
 

	
 
        // Preallocate root in the heap
 
        let root_id = ctx.heap.alloc_protocol_description(|this| {
 
            Root{
 
                this,
 
                pragmas: Vec::new(),
 
                imports: Vec::new(),
 
                definitions: Vec::new(),
 
            }
 
        });
 
        module.root_id = root_id;
 

	
 
        // Retrieve first range index, then make immutable borrow
 
        let mut range_idx = module_range.first_child_idx;
 
        let module = &modules[module_idx];
 

	
 
        // Visit token ranges to detect definitions and pragmas
 
        loop {
 
            let range_idx_usize = range_idx as usize;
 
            let cur_range = &module.tokens.ranges[range_idx_usize];
 
            let next_sibling_idx = cur_range.next_sibling_idx;
 
            let range_kind = cur_range.range_kind;
 

	
 
            // Parse if it is a definition or a pragma
 
            if range_kind == TokenRangeKind::Definition {
 
                self.visit_definition_range(modules, module_idx, ctx, range_idx_usize)?;
 
            } else if range_kind == TokenRangeKind::Pragma {
 
                self.visit_pragma_range(modules, module_idx, ctx, range_idx_usize)?;
 
            }
 

	
 
            match next_sibling_idx {
 
                Some(idx) => { range_idx = idx; },
 
                None => { break; },
 
            if next_sibling_idx == NO_SIBLING {
 
                break;
 
            } else {
 
                range_idx = next_sibling_idx;
 
            }
 
        }
 

	
 
        // Add the module's symbol scope and the symbols we just parsed
 
        let module_scope = SymbolScope::Module(root_id);
 
        ctx.symbols.insert_scope(None, module_scope);
 
        for symbol in self.symbols.drain(..) {
 
            if let Err((new_symbol, old_symbol)) = ctx.symbols.insert_symbol(module_scope, symbol) {
 
                return Err(construct_symbol_conflict_error(modules, module_idx, ctx, &new_symbol, &old_symbol))
 
            }
 
        }
 

	
 
        // Modify the preallocated root
 
        let root = &mut ctx.heap[root_id];
 
        root.pragmas.extend(self.pragmas.drain(..));
 
        root.definitions.extend(self.definitions.drain(..));
 

	
 
        let module = &mut modules[module_idx];
 
        module.phase = ModuleCompilationPhase::SymbolsScanned;
 

	
 
        Ok(())
 
    }
 

	
 
    fn visit_pragma_range(&mut self, modules: &[Module], module_idx: usize, ctx: &mut PassCtx, range_idx: usize) -> Result<(), ParseError> {
 
        let module = &modules[module_idx];
 
        let range = &module.tokens.ranges[range_idx];
 
        let mut iter = module.tokens.iter_range(range);
 

	
 
        // Consume pragma name
 
        let (pragma_section, pragma_start, _) = consume_pragma(&module.source, &mut iter)?;
 

	
 
        // Consume pragma values
 
        if pragma_section == b"#module" {
 
            // Check if name is defined twice within the same file
 
            if self.has_pragma_module {
 
                return Err(ParseError::new_error_str_at_pos(&module.source, pragma_start, "module name is defined twice"));
 
            }
 

	
 
            // Consume the domain-name
 
            let (module_name, module_span) = consume_domain_ident(&module.source, &mut iter)?;
 
            if iter.next().is_some() {
 
                return Err(ParseError::new_error_str_at_pos(&module.source, iter.last_valid_pos(), "expected end of #module pragma after module name"));
 
            }
 

	
 
            // Add to heap and symbol table
 
            let pragma_span = InputSpan::from_positions(pragma_start, module_span.end);
 
            let module_name = ctx.pool.intern(module_name);
 
            let pragma_id = ctx.heap.alloc_pragma(|this| Pragma::Module(PragmaModule{
 
                this,
 
                span: pragma_span,
 
                value: Identifier{ span: module_span, value: module_name.clone() },
 
            }));
 
            self.pragmas.push(pragma_id);
 

	
 
            if let Err(other_module_root_id) = ctx.symbols.insert_module(module_name, module.root_id) {
 
                // Naming conflict
 
                let this_module = &modules[module_idx];
 
                let other_module = seek_module(modules, other_module_root_id).unwrap();
 
                let other_module_pragma_id = other_module.name.as_ref().map(|v| (*v).0).unwrap();
 
                let other_pragma = ctx.heap[other_module_pragma_id].as_module();
 
                return Err(ParseError::new_error_str_at_span(
 
                    &this_module.source, pragma_span, "conflict in module name"
 
                ).with_info_str_at_span(
 
                    &other_module.source, other_pragma.span, "other module is defined here"
 
                ));
 
            }
 
            self.has_pragma_module = true;
 
        } else if pragma_section == b"#version" {
 
            // Check if version is defined twice within the same file
 
            if self.has_pragma_version {
 
                return Err(ParseError::new_error_str_at_pos(&module.source, pragma_start, "module version is defined twice"));
 
            }
 

	
 
            // Consume the version pragma
 
            let (version, version_span) = consume_integer_literal(&module.source, &mut iter, &mut self.buffer)?;
 
            let pragma_id = ctx.heap.alloc_pragma(|this| Pragma::Version(PragmaVersion{
 
                this,
 
                span: InputSpan::from_positions(pragma_start, version_span.end),
 
                version,
 
            }));
 
            self.pragmas.push(pragma_id);
 
            self.has_pragma_version = true;
 
        } else {
 
            // Custom pragma, maybe we support this in the future, but for now
 
            // we don't.
 
            return Err(ParseError::new_error_str_at_pos(&module.source, pragma_start, "illegal pragma name"));
 
        }
 

	
 
        Ok(())
 
    }
 

	
 
    fn visit_definition_range(&mut self, modules: &[Module], module_idx: usize, ctx: &mut PassCtx, range_idx: usize) -> Result<(), ParseError> {
 
        let module = &modules[module_idx];
 
        let range = &module.tokens.ranges[range_idx];
 
        let definition_span = InputSpan::from_positions(
 
            module.tokens.start_pos(range),
 
            module.tokens.end_pos(range)
 
        );
 
        let mut iter = module.tokens.iter_range(range);
 

	
 
        // First ident must be type of symbol
 
        let (kw_text, _) = consume_any_ident(&module.source, &mut iter).unwrap();
 

	
 
        // Retrieve identifier of definition
 
        let identifier = consume_ident_interned(&module.source, &mut iter, ctx)?;
 
        let mut poly_vars = Vec::new();
 
        maybe_consume_comma_separated(
 
            TokenKind::OpenAngle, TokenKind::CloseAngle, &module.source, &mut iter, ctx,
 
            |source, iter, ctx| consume_ident_interned(source, iter, ctx),
 
            &mut poly_vars, "a polymorphic variable", None
 
        )?;
 
        let ident_text = identifier.value.clone(); // because we need it later
 
        let ident_span = identifier.span.clone();
 

	
 
        // Reserve space in AST for definition and add it to the symbol table
 
        let definition_class;
 
        let ast_definition_id;
 
        match kw_text {
 
            KW_STRUCT => {
 
                let struct_def_id = ctx.heap.alloc_struct_definition(|this| {
 
                    StructDefinition::new_empty(this, module.root_id, definition_span, identifier, poly_vars)
 
                });
 
                definition_class = DefinitionClass::Struct;
 
                ast_definition_id = struct_def_id.upcast();
 
            },
 
            KW_ENUM => {
 
                let enum_def_id = ctx.heap.alloc_enum_definition(|this| {
 
                    EnumDefinition::new_empty(this, module.root_id, definition_span, identifier, poly_vars)
 
                });
 
                definition_class = DefinitionClass::Enum;
 
                ast_definition_id = enum_def_id.upcast();
 
            },
 
            KW_UNION => {
 
                let union_def_id = ctx.heap.alloc_union_definition(|this| {
 
                    UnionDefinition::new_empty(this, module.root_id, definition_span, identifier, poly_vars)
 
                });
 
                definition_class = DefinitionClass::Union;
 
                ast_definition_id = union_def_id.upcast()
 
            },
 
            KW_FUNCTION => {
 
                let func_def_id = ctx.heap.alloc_function_definition(|this| {
 
                    FunctionDefinition::new_empty(this, module.root_id, definition_span, identifier, poly_vars)
 
                });
 
                definition_class = DefinitionClass::Function;
 
                ast_definition_id = func_def_id.upcast();
 
            },
 
            KW_PRIMITIVE | KW_COMPOSITE => {
 
                let component_variant = if kw_text == KW_PRIMITIVE {
 
                    ComponentVariant::Primitive
 
                } else {
 
                    ComponentVariant::Composite
 
                };
 
                let comp_def_id = ctx.heap.alloc_component_definition(|this| {
 
                    ComponentDefinition::new_empty(this, module.root_id, definition_span, component_variant, identifier, poly_vars)
 
                });
 
                definition_class = DefinitionClass::Component;
 
                ast_definition_id = comp_def_id.upcast();
 
            },
 
            _ => unreachable!("encountered keyword '{}' in definition range", String::from_utf8_lossy(kw_text)),
 
        }
 

	
 
        let symbol = Symbol{
 
            name: ident_text,
 
            variant: SymbolVariant::Definition(SymbolDefinition{
 
                defined_in_module: module.root_id,
 
                defined_in_scope: SymbolScope::Module(module.root_id),
 
                definition_span,
 
                identifier_span: ident_span,
 
                imported_at: None,
 
                class: definition_class,
 
                definition_id: ast_definition_id,
 
            }),
 
        };
 
        self.symbols.push(symbol);
 
        self.definitions.push(ast_definition_id);
 

	
 
        Ok(())
 
    }
 
}
 
\ No newline at end of file
src/protocol/parser/pass_tokenizer.rs
Show inline comments
 
use crate::protocol::input_source::{
 
    InputSource as InputSource,
 
    ParseError,
 
    InputPosition as InputPosition,
 
};
 

	
 
use super::tokens::*;
 
use super::token_parsing::*;
 

	
 
/// Tokenizer is a reusable parser to tokenize multiple source files using the
 
/// same allocated buffers. In a well-formed program, we produce a consistent
 
/// tree of token ranges such that we may identify tokens that represent a
 
/// defintion or an import before producing the entire AST.
 
///
 
/// If the program is not well-formed then the tree may be inconsistent, but we
 
/// will detect this once we transform the tokens into the AST. To ensure a
 
/// consistent AST-producing phase we will require the import to have balanced
 
/// curly braces
 
pub(crate) struct PassTokenizer {
 
    // Stack of input positions of opening curly braces, used to detect
 
    // unmatched opening braces, unmatched closing braces are detected
 
    // immediately.
 
    curly_stack: Vec<InputPosition>,
 
    // Points to an element in the `TokenBuffer.ranges` variable.
 
    stack_idx: usize,
 
}
 

	
 
impl PassTokenizer {
 
    pub(crate) fn new() -> Self {
 
        Self{
 
            curly_stack: Vec::with_capacity(32),
 
            stack_idx: 0
 
        }
 
    }
 

	
 
    pub(crate) fn tokenize(&mut self, source: &mut InputSource, target: &mut TokenBuffer) -> Result<(), ParseError> {
 
        // Assert source and buffer are at start
 
        debug_assert_eq!(source.pos().offset, 0);
 
        debug_assert!(target.tokens.is_empty());
 
        debug_assert!(target.ranges.is_empty());
 

	
 
        // Set up for tokenization by pushing the first range onto the stack.
 
        // This range may get transformed into the appropriate range kind later,
 
        // see `push_range` and `pop_range`.
 
        self.stack_idx = 0;
 
        target.ranges.push(TokenRange{
 
            parent_idx: 0,
 
            parent_idx: NO_RELATION,
 
            range_kind: TokenRangeKind::Module,
 
            curly_depth: 0,
 
            start: 0,
 
            end: 0,
 
            num_child_ranges: 0,
 
            first_child_idx: 0,
 
            last_child_idx: 0,
 
            next_sibling_idx: None,
 
            first_child_idx: NO_RELATION,
 
            last_child_idx: NO_RELATION,
 
            next_sibling_idx: NO_RELATION,
 
        });
 

	
 
        // Main tokenization loop
 
        while let Some(c) = source.next() {
 
            let token_index = target.tokens.len() as u32;
 

	
 
            if is_char_literal_start(c) {
 
                self.consume_char_literal(source, target)?;
 
            } else if is_string_literal_start(c) {
 
                self.consume_string_literal(source, target)?;
 
            } else if is_identifier_start(c) {
 
                let ident = self.consume_identifier(source, target)?;
 

	
 
                if demarks_definition(ident) {
 
                    self.push_range(target, TokenRangeKind::Definition, token_index);
 
                } else if demarks_import(ident) {
 
                    self.push_range(target, TokenRangeKind::Import, token_index);
 
                }
 
            } else if is_integer_literal_start(c) {
 
                self.consume_number(source, target)?;
 
            } else if is_pragma_start_or_pound(c) {
 
                let was_pragma = self.consume_pragma_or_pound(c, source, target)?;
 
                if was_pragma {
 
                    self.push_range(target, TokenRangeKind::Pragma, token_index);
 
                }
 
            } else if self.is_line_comment_start(c, source) {
 
                self.consume_line_comment(source, target)?;
 
            } else if self.is_block_comment_start(c, source) {
 
                self.consume_block_comment(source, target)?;
 
            } else if is_whitespace(c) {
 
                let contained_newline = self.consume_whitespace(source);
 
                if contained_newline {
 
                    let range = &target.ranges[self.stack_idx];
 
                    if range.range_kind == TokenRangeKind::Pragma {
 
                        self.pop_range(target, target.tokens.len() as u32);
 
                    }
 
                }
 
            } else {
 
                let was_punctuation = self.maybe_parse_punctuation(c, source, target)?;
 
                if let Some((token, token_pos)) = was_punctuation {
 
                    if token == TokenKind::OpenCurly {
 
                        self.curly_stack.push(token_pos);
 
                    } else if token == TokenKind::CloseCurly {
 
                        // Check if this marks the end of a range we're
 
                        // currently processing
 
                        if self.curly_stack.is_empty() {
 
                            return Err(ParseError::new_error_str_at_pos(
 
                                source, token_pos, "unmatched closing curly brace '}'"
 
                            ));
 
                        }
 

	
 
                        self.curly_stack.pop();
 

	
 
                        let range = &target.ranges[self.stack_idx];
 
                        if range.range_kind == TokenRangeKind::Definition && range.curly_depth == self.curly_stack.len() as u32 {
 
                            self.pop_range(target, target.tokens.len() as u32);
 
                        }
 

	
 
                        // Exit early if we have more closing curly braces than
 
                        // opening curly braces
 
                    } else if token == TokenKind::SemiColon {
 
                        // Check if this marks the end of an import
 
                        let range = &target.ranges[self.stack_idx];
 
                        if range.range_kind == TokenRangeKind::Import {
 
                            self.pop_range(target, target.tokens.len() as u32);
 
                        }
 
                    }
 
                } else {
 
                    return Err(ParseError::new_error_str_at_pos(
 
                        source, source.pos(), "unexpected character"
 
                    ));
 
                }
 
            }
 
        }
 

	
 
        // End of file, check if our state is correct
 
        if let Some(error) = source.had_error.take() {
 
            return Err(error);
 
        }
 

	
 
        if !self.curly_stack.is_empty() {
 
            // Let's not add a lot of heuristics and just tell the programmer
 
            // that something is wrong
 
            let last_unmatched_open = self.curly_stack.pop().unwrap();
 
            return Err(ParseError::new_error_str_at_pos(
 
                source, last_unmatched_open, "unmatched opening curly brace '{'"
 
            ));
 
        }
 

	
 
        // Ranges that did not depend on curly braces may have missing tokens.
 
        // So close all of the active tokens
 
        while self.stack_idx != 0 {
 
            self.pop_range(target, target.tokens.len() as u32);
 
        }
 

	
 
        // And finally, we may have a token range at the end that doesn't belong
 
        // to a range yet, so insert a "code" range if this is the case.
 
        debug_assert_eq!(self.stack_idx, 0);
 
        let last_token_idx = target.tokens.len() as u32;
 
        if target.ranges[0].end != last_token_idx {
 

	
 
        }
 

	
 
        // TODO: @remove once I'm sure the algorithm works. For now it is better
 
        //  if the debugging is a little more expedient
 
        if cfg!(debug_assertions) {
 
            // For each range make sure its children make sense
 
            for parent_idx in 0..target.ranges.len() {
 
                let cur_range = &target.ranges[parent_idx];
 
                if cur_range.num_child_ranges == 0 {
 
                    assert_eq!(cur_range.first_child_idx, parent_idx as u32);
 
                    assert_eq!(cur_range.last_child_idx, parent_idx as u32);
 
                    assert_eq!(cur_range.first_child_idx, NO_RELATION);
 
                    assert_eq!(cur_range.last_child_idx, NO_RELATION);
 
                } else {
 
                    assert_ne!(cur_range.first_child_idx, parent_idx as u32);
 
                    assert_ne!(cur_range.last_child_idx, parent_idx as u32);
 
                    assert_ne!(cur_range.first_child_idx, NO_RELATION);
 
                    assert_ne!(cur_range.last_child_idx, NO_RELATION);
 

	
 
                    let mut child_counter = 0u32;
 
                    let last_child_idx = cur_range.first_child_idx;
 
                    let mut child_idx = Some(cur_range.first_child_idx);
 
                    while let Some(cur_child_idx) = child_idx {
 
                        let child_range = &target.ranges[cur_child_idx as usize];
 
                        assert_eq!(child_range.parent_idx, parent_idx);
 
                    let mut last_valid_child_idx = cur_range.first_child_idx;
 
                    let mut child_idx = cur_range.first_child_idx;
 
                    while child_idx != NO_RELATION {
 
                        let child_range = &target.ranges[child_idx as usize];
 
                        assert_eq!(child_range.parent_idx, parent_idx as i32);
 
                        last_valid_child_idx = child_idx;
 
                        child_idx = child_range.next_sibling_idx;
 
                        child_counter += 1;
 
                    }
 

	
 
                    assert_eq!(cur_range.last_child_idx, last_child_idx);
 
                    assert_eq!(cur_range.last_child_idx, last_valid_child_idx);
 
                    assert_eq!(cur_range.num_child_ranges, child_counter);
 
                }
 
            }
 
        }
 

	
 
        Ok(())
 
    }
 

	
 
    fn is_line_comment_start(&self, first_char: u8, source: &InputSource) -> bool {
 
        return first_char == b'/' && Some(b'/') == source.lookahead(1);
 
    }
 

	
 
    fn is_block_comment_start(&self, first_char: u8, source: &InputSource) -> bool {
 
        return first_char == b'/' && Some(b'*') == source.lookahead(1);
 
    }
 

	
 
    fn maybe_parse_punctuation(
 
        &mut self, first_char: u8, source: &mut InputSource, target: &mut TokenBuffer
 
    ) -> Result<Option<(TokenKind, InputPosition)>, ParseError> {
 
        debug_assert!(first_char != b'#', "'#' needs special handling");
 
        debug_assert!(first_char != b'\'', "'\'' needs special handling");
 
        debug_assert!(first_char != b'"', "'\"' needs special handling");
 

	
 
        let pos = source.pos();
 
        let token_kind;
 
        if first_char == b'!' {
 
            source.consume();
 
            if Some(b'=') == source.next() {
 
                source.consume();
 
                token_kind = TokenKind::NotEqual;
 
            } else {
 
                token_kind = TokenKind::Exclamation;
 
            }
 
        } else if first_char == b'%' {
 
            source.consume();
 
            if Some(b'=') == source.next() {
 
                source.consume();
 
                token_kind = TokenKind::PercentEquals;
 
            } else {
 
                token_kind = TokenKind::Percent;
 
            }
 
        } else if first_char == b'&' {
 
            source.consume();
 
            let next = source.next();
 
            if Some(b'&') == next {
 
                source.consume();
 
                token_kind = TokenKind::AndAnd;
 
            } else if Some(b'=') == next {
 
                source.consume();
 
                token_kind = TokenKind::AndEquals;
 
            } else {
 
                token_kind = TokenKind::And;
 
            }
 
        } else if first_char == b'(' {
 
            source.consume();
 
            token_kind = TokenKind::OpenParen;
 
        } else if first_char == b')' {
 
            source.consume();
 
            token_kind = TokenKind::CloseParen;
 
        } else if first_char == b'*' {
 
            source.consume();
 
            if let Some(b'=') = source.next() {
 
                source.consume();
 
                token_kind = TokenKind::StarEquals;
 
            } else {
 
                token_kind = TokenKind::Star;
 
            }
 
        } else if first_char == b'+' {
 
            source.consume();
 
            let next = source.next();
 
            if Some(b'+') == next {
 
                source.consume();
 
                token_kind = TokenKind::PlusPlus;
 
            } else if Some(b'=') == next {
 
                source.consume();
 
                token_kind = TokenKind::PlusEquals;
 
            } else {
 
                token_kind = TokenKind::Plus;
 
            }
 
        } else if first_char == b',' {
 
            source.consume();
 
            token_kind = TokenKind::Comma;
 
        } else if first_char == b'-' {
 
            source.consume();
 
            let next = source.next();
 
            if Some(b'-') == next {
 
                source.consume();
 
                token_kind = TokenKind::MinusMinus;
 
            } else if Some(b'>') == next {
 
                source.consume();
 
                token_kind = TokenKind::ArrowRight;
 
            } else if Some(b'=') == next {
 
                source.consume();
 
                token_kind = TokenKind::MinusEquals;
 
            } else {
 
                token_kind = TokenKind::Minus;
 
            }
 
        } else if first_char == b'.' {
 
            source.consume();
 
            if let Some(b'.') = source.next() {
 
                source.consume();
 
                token_kind = TokenKind::DotDot;
 
            } else {
 
                token_kind = TokenKind::Dot
 
            }
 
        } else if first_char == b'/' {
 
            source.consume();
 
            debug_assert_ne!(Some(b'/'), source.next());
 
            debug_assert_ne!(Some(b'*'), source.next());
 
            if let Some(b'=') = source.next() {
 
                source.consume();
 
                token_kind = TokenKind::SlashEquals;
 
            } else {
 
                token_kind = TokenKind::Slash;
 
            }
 
        } else if first_char == b':' {
 
            source.consume();
 
            if let Some(b':') = source.next() {
 
                source.consume();
 
                token_kind = TokenKind::ColonColon;
 
            } else {
 
                token_kind = TokenKind::Colon;
 
            }
 
        } else if first_char == b';' {
 
            source.consume();
 
            token_kind = TokenKind::SemiColon;
 
        } else if first_char == b'<' {
 
            source.consume();
 
            let next = source.next();
 
            if let Some(b'<') = next {
 
                source.consume();
 
                if let Some(b'=') = source.next() {
 
                    source.consume();
 
                    token_kind = TokenKind::ShiftLeftEquals;
 
                } else {
 
                    token_kind = TokenKind::ShiftLeft;
 
                }
 
            } else if let Some(b'=') = next {
 
                source.consume();
 
                token_kind = TokenKind::LessEquals;
 
            } else {
 
                token_kind = TokenKind::OpenAngle;
 
            }
 
        } else if first_char == b'=' {
 
            source.consume();
 
            if let Some(b'=') = source.next() {
 
                source.consume();
 
                token_kind = TokenKind::EqualEqual;
 
            } else {
 
                token_kind = TokenKind::Equal;
 
            }
 
        } else if first_char == b'>' {
 
            source.consume();
 
            let next = source.next();
 
            if Some(b'>') == next {
 
                source.consume();
 
                if Some(b'=') == source.next() {
 
                    source.consume();
 
                    token_kind = TokenKind::ShiftRightEquals;
 
                } else {
 
                    token_kind = TokenKind::ShiftRight;
 
                }
 
            } else if Some(b'=') == next {
 
                source.consume();
 
                token_kind = TokenKind::GreaterEquals;
 
            } else {
 
                token_kind = TokenKind::CloseAngle;
 
            }
 
        } else if first_char == b'?' {
 
            source.consume();
 
            token_kind = TokenKind::Question;
 
        } else if first_char == b'@' {
 
            source.consume();
 
            token_kind = TokenKind::At;
 
        } else if first_char == b'[' {
 
            source.consume();
 
            token_kind = TokenKind::OpenSquare;
 
        } else if first_char == b']' {
 
            source.consume();
 
            token_kind = TokenKind::CloseSquare;
 
        } else if first_char == b'^' {
 
            source.consume();
 
            if let Some(b'=') = source.next() {
 
                source.consume();
 
                token_kind = TokenKind::CaretEquals;
 
            } else {
 
                token_kind = TokenKind::Caret;
 
            }
 
        } else if first_char == b'{' {
 
            source.consume();
 
            token_kind = TokenKind::OpenCurly;
 
        } else if first_char == b'|' {
 
            source.consume();
 
            let next = source.next();
 
            if Some(b'|') == next {
 
                source.consume();
 
                token_kind = TokenKind::OrOr;
 
            } else if Some(b'=') == next {
 
                source.consume();
 
                token_kind = TokenKind::OrEquals;
 
            } else {
 
                token_kind = TokenKind::Or;
 
            }
 
        } else if first_char == b'}' {
 
            source.consume();
 
            token_kind = TokenKind::CloseCurly;
 
        } else if first_char == b'~' {
 
            source.consume();
 
            token_kind = TokenKind::Tilde;
 
        } else {
 
            self.check_ascii(source)?;
 
            return Ok(None);
 
        }
 

	
 
        target.tokens.push(Token::new(token_kind, pos));
 
        Ok(Some((token_kind, pos)))
 
    }
 

	
 
    fn consume_char_literal(&mut self, source: &mut InputSource, target: &mut TokenBuffer) -> Result<(), ParseError> {
 
        let begin_pos = source.pos();
 

	
 
        // Consume the leading quote
 
        debug_assert!(source.next().unwrap() == b'\'');
 
        source.consume();
 

	
 
        let mut prev_char = b'\'';
 
        while let Some(c) = source.next() {
 
            if !c.is_ascii() {
 
                return Err(ParseError::new_error_str_at_pos(source, source.pos(), "non-ASCII character in char literal"));
 
            }
 
            source.consume();
 

	
 
            // Make sure ending quote was not escaped
 
            if c == b'\'' && prev_char != b'\\' {
 
                prev_char = c;
 
                break;
 
            }
 

	
 
            prev_char = c;
 
        }
 

	
 
        if prev_char != b'\'' {
 
            // Unterminated character literal, reached end of file.
 
            return Err(ParseError::new_error_str_at_pos(source, begin_pos, "encountered unterminated character literal"));
 
        }
 

	
 
        let end_pos = source.pos();
 

	
 
        target.tokens.push(Token::new(TokenKind::Character, begin_pos));
 
        target.tokens.push(Token::new(TokenKind::SpanEnd, end_pos));
 

	
 
        Ok(())
 
    }
 

	
 
    fn consume_string_literal(&mut self, source: &mut InputSource, target: &mut TokenBuffer) -> Result<(), ParseError> {
 
        let begin_pos = source.pos();
 

	
 
        // Consume the leading double quotes
 
        debug_assert!(source.next().unwrap() == b'"');
 
        source.consume();
 

	
 
        let mut prev_char = b'"';
 
        while let Some(c) = source.next() {
 
            if !c.is_ascii() {
 
                return Err(ParseError::new_error_str_at_pos(source, source.pos(), "non-ASCII character in string literal"));
 
            }
 

	
 
            source.consume();
 
            if c == b'"' && prev_char != b'\\' {
 
                prev_char = c;
 
                break;
 
            }
 

	
 
            prev_char = c;
 
        }
 

	
 
        if prev_char != b'"' {
 
            // Unterminated string literal
 
            return Err(ParseError::new_error_str_at_pos(source, begin_pos, "encountered unterminated string literal"));
 
        }
 

	
 
        let end_pos = source.pos();
 
        target.tokens.push(Token::new(TokenKind::String, begin_pos));
 
        target.tokens.push(Token::new(TokenKind::SpanEnd, end_pos));
 

	
 
        Ok(())
 
    }
 

	
 
    fn consume_pragma_or_pound(&mut self, first_char: u8, source: &mut InputSource, target: &mut TokenBuffer) -> Result<bool, ParseError> {
 
        let start_pos = source.pos();
 
        debug_assert_eq!(first_char, b'#');
 
        source.consume();
 

	
 
        let next = source.next();
 
        if next.is_none() || !is_identifier_start(next.unwrap()) {
 
            // Just a pound sign
 
            target.tokens.push(Token::new(TokenKind::Pound, start_pos));
 
            Ok(false)
 
        } else {
 
            // Pound sign followed by identifier
 
            source.consume();
 
            while let Some(c) = source.next() {
 
                if !is_identifier_remaining(c) {
 
                    break;
 
                }
 
                source.consume();
 
            }
 

	
 
            self.check_ascii(source)?;
 

	
 
            let end_pos = source.pos();
 
            target.tokens.push(Token::new(TokenKind::Pragma, start_pos));
 
            target.tokens.push(Token::new(TokenKind::SpanEnd, end_pos));
 
            Ok(true)
 
        }
 
    }
 

	
 
    fn consume_line_comment(&mut self, source: &mut InputSource, target: &mut TokenBuffer) -> Result<(), ParseError> {
 
        let begin_pos = source.pos();
 

	
 
        // Consume the leading "//"
 
        debug_assert!(source.next().unwrap() == b'/' && source.lookahead(1).unwrap() == b'/');
 
        source.consume();
 
        source.consume();
 

	
 
        let mut prev_char = b'/';
 
        let mut cur_char = b'/';
 
        while let Some(c) = source.next() {
 
            prev_char = cur_char;
 
            cur_char = c;
 

	
 
            if c == b'\n' {
 
                // End of line, note that the newline is not consumed
 
                break;
 
            }
 

	
 
            source.consume();
 
        }
 

	
 
        let mut end_pos = source.pos();
 
        debug_assert_eq!(begin_pos.line, end_pos.line);
 

	
 
        // Modify offset to not include the newline characters
 
        if cur_char == b'\n' {
 
            if prev_char == b'\r' {
 
                end_pos.offset -= 2;
 
            } else {
 
                end_pos.offset -= 1;
 
            }
 
            // Consume final newline
 
            source.consume();
 
        } else {
 
            // End of comment was due to EOF
 
            debug_assert!(source.next().is_none())
 
        }
 

	
 
        target.tokens.push(Token::new(TokenKind::LineComment, begin_pos));
 
        target.tokens.push(Token::new(TokenKind::SpanEnd, end_pos));
 

	
 
        Ok(())
 
    }
 

	
 
    fn consume_block_comment(&mut self, source: &mut InputSource, target: &mut TokenBuffer) -> Result<(), ParseError> {
 
        let begin_pos = source.pos();
 

	
 
        // Consume the leading "/*"
 
        debug_assert!(source.next().unwrap() == b'/' && source.lookahead(1).unwrap() == b'*');
 
        source.consume();
 
        source.consume();
 

	
 
        // Explicitly do not put prev_char at "*", because then "/*/" would
 
        // represent a valid and closed block comment
 
        let mut prev_char = b' ';
 
        let mut is_closed = false;
 
        while let Some(c) = source.next() {
 
            source.consume();
 
            if prev_char == b'*' && c == b'/' {
 
                // End of block comment
 
                is_closed = true;
 
                break;
 
            }
 
            prev_char = c;
 
        }
 

	
 
        if !is_closed {
 
            return Err(ParseError::new_error_str_at_pos(
 
                source, source.pos(), "encountered unterminated block comment")
 
            );
 
        }
 

	
 
        let end_pos = source.pos();
 
        target.tokens.push(Token::new(TokenKind::BlockComment, begin_pos));
 
        target.tokens.push(Token::new(TokenKind::SpanEnd, end_pos));
 

	
 
        Ok(())
 
    }
 

	
 
    fn consume_identifier<'a>(&mut self, source: &'a mut InputSource, target: &mut TokenBuffer) -> Result<&'a [u8], ParseError> {
 
        let begin_pos = source.pos();
 
        debug_assert!(is_identifier_start(source.next().unwrap()));
 
        source.consume();
 

	
 
        // Keep reading until no more identifier
 
        while let Some(c) = source.next() {
 
            if !is_identifier_remaining(c) {
 
                break;
 
            }
 

	
 
            source.consume();
 
        }
 
        self.check_ascii(source)?;
 

	
 
        let end_pos = source.pos();
 
        target.tokens.push(Token::new(TokenKind::Ident, begin_pos));
 
        target.tokens.push(Token::new(TokenKind::SpanEnd, end_pos));
 
        Ok(source.section_at_pos(begin_pos, end_pos))
 
    }
 

	
 
    fn consume_number(&mut self, source: &mut InputSource, target: &mut TokenBuffer) -> Result<(), ParseError> {
 
        let begin_pos = source.pos();
 
        debug_assert!(is_integer_literal_start(source.next().unwrap()));
 
        source.consume();
 

	
 
        // Keep reading until it doesn't look like a number anymore
 
        while let Some(c) = source.next() {
 
            if !maybe_number_remaining(c) {
 
                break;
 
            }
 

	
 
            source.consume();
 
        }
 
        self.check_ascii(source)?;
 

	
 
        let end_pos = source.pos();
 
        target.tokens.push(Token::new(TokenKind::Integer, begin_pos));
 
        target.tokens.push(Token::new(TokenKind::SpanEnd, end_pos));
 

	
 
        Ok(())
 
    }
 

	
 
    // Consumes whitespace and returns whether or not the whitespace contained
 
    // a newline.
 
    fn consume_whitespace(&self, source: &mut InputSource) -> bool {
 
        debug_assert!(is_whitespace(source.next().unwrap()));
 

	
 
        let mut has_newline = false;
 
        while let Some(c) = source.next() {
 
            if !is_whitespace(c) {
 
                break;
 
            }
 

	
 
            if c == b'\n' {
 
                has_newline = true;
 
            }
 
            source.consume();
 
        }
 

	
 
        has_newline
 
    }
 

	
 
    /// Pushes a new token range onto the stack in the buffers.
 
    fn push_range(&mut self, target: &mut TokenBuffer, range_kind: TokenRangeKind, first_token: u32) {
 
        let new_range_idx = target.ranges.len() as u32;
 
        let cur_range = &mut target.ranges[self.stack_idx];
 

	
 
        // If we have just popped a range and then push a new range, then the
 
        // first token is equal to the last token registered on the current
 
        // range. If not, then we had some intermediate tokens that did not
 
        // belong to a particular kind of token range: hence we insert an
 
        // intermediate "code" range.
 
        if cur_range.end != first_token {
 
            let code_start = cur_range.end;
 
    fn push_range(&mut self, target: &mut TokenBuffer, range_kind: TokenRangeKind, first_token_idx: u32) {
 
        let new_range_idx = target.ranges.len() as i32;
 
        let parent_idx = self.stack_idx as i32;
 
        let parent_range = &mut target.ranges[self.stack_idx];
 
        let curly_depth = self.curly_stack.len() as u32;
 

	
 
            if cur_range.first_child_idx == self.stack_idx as u32 {
 
                // The parent of the new "code" range we're going to push does
 
                // not have any registered children yet.
 
                cur_range.first_child_idx = new_range_idx;
 
        if parent_range.first_child_idx == NO_RELATION {
 
            parent_range.first_child_idx = new_range_idx;
 
        }
 
            cur_range.last_child_idx = new_range_idx + 1;
 

	
 
            cur_range.end = first_token;
 
            cur_range.num_child_ranges += 1;
 
        if parent_range.end != first_token_idx {
 
            // We popped a range, processed some intermediate tokens and now
 
            // enter a new range. Those intermediate tokens do not belong to a
 
            // particular range yet. So we put them in a "code" range.
 

	
 
            // Remember last sibling from parent (if any)
 
            let sibling_idx = parent_range.last_child_idx;
 

	
 
            // Push the code range
 
            let code_start_idx = parent_range.end;
 
            let code_end_idx = first_token_idx;
 

	
 
            parent_range.last_child_idx = new_range_idx;
 
            parent_range.end = code_end_idx;
 
            parent_range.num_child_ranges += 1;
 

	
 
            target.ranges.push(TokenRange{
 
                parent_idx: self.stack_idx,
 
                parent_idx,
 
                range_kind: TokenRangeKind::Code,
 
                curly_depth: self.curly_stack.len() as u32,
 
                start: code_start,
 
                end: first_token,
 
                curly_depth,
 
                start: code_start_idx,
 
                end: code_end_idx,
 
                num_child_ranges: 0,
 
                first_child_idx: new_range_idx,
 
                last_child_idx: new_range_idx,
 
                next_sibling_idx: Some(new_range_idx + 1), // we're going to push this thing next
 
                first_child_idx: NO_RELATION,
 
                last_child_idx: NO_RELATION,
 
                next_sibling_idx: new_range_idx + 1, // we're going to push this range below
 
            });
 
        } else {
 
            // We're going to push the range in the code below, but while we
 
            // have the `cur_range` borrowed mutably, we fix up its children
 
            // indices.
 
            if cur_range.first_child_idx == self.stack_idx as u32 {
 
                cur_range.first_child_idx = new_range_idx;
 

	
 
            // Fix up the sibling indices
 
            if sibling_idx != NO_RELATION {
 
                let sibling_range = &mut target.ranges[sibling_idx as usize];
 
                sibling_range.next_sibling_idx = new_range_idx;
 
            }
 
            cur_range.last_child_idx = new_range_idx;
 
        }
 

	
 
        // Insert a new range
 
        let parent_idx = self.stack_idx;
 
        // Push the new range
 
        self.stack_idx = target.ranges.len();
 
        let new_range_idx = self.stack_idx as u32;
 
        target.ranges.push(TokenRange{
 
            parent_idx,
 
            range_kind,
 
            curly_depth: self.curly_stack.len() as u32,
 
            start: first_token,
 
            end: first_token,
 
            curly_depth,
 
            start: first_token_idx,
 
            end: first_token_idx, // modified when popped
 
            num_child_ranges: 0,
 
            first_child_idx: new_range_idx,
 
            last_child_idx: new_range_idx,
 
            next_sibling_idx: None,
 
        });
 
            first_child_idx: NO_RELATION,
 
            last_child_idx: NO_RELATION,
 
            next_sibling_idx: NO_RELATION
 
        })
 
    }
 

	
 
    fn pop_range(&mut self, target: &mut TokenBuffer, end_index: u32) {
 
        let last = &mut target.ranges[self.stack_idx];
 
        debug_assert!(self.stack_idx != last.parent_idx, "attempting to pop top-level range");
 
    fn pop_range(&mut self, target: &mut TokenBuffer, end_token_idx: u32) {
 
        let popped_idx = self.stack_idx as i32;
 
        let popped_range = &mut target.ranges[self.stack_idx];
 
        debug_assert!(self.stack_idx != 0, "attempting to pop top-level range");
 

	
 
        // Fix up the current range before going back to parent
 
        last.end = end_index;
 
        debug_assert_ne!(last.start, end_index);
 
        popped_range.end = end_token_idx;
 
        debug_assert_ne!(popped_range.start, end_token_idx);
 

	
 
        // Go back to parent
 
        self.stack_idx = last.parent_idx;
 
        // Go back to parent and fix up its child pointers, but remember the
 
        // last child, so we can link it to the newly popped range.
 
        self.stack_idx = popped_range.parent_idx as usize;
 
        let parent = &mut target.ranges[self.stack_idx];
 
        parent.end = end_index;
 
        if parent.first_child_idx == NO_RELATION {
 
            parent.first_child_idx = popped_idx;
 
        }
 
        let prev_sibling_idx = parent.last_child_idx;
 
        parent.last_child_idx = popped_idx;
 
        parent.end = end_token_idx;
 
        parent.num_child_ranges += 1;
 

	
 
        // Fix up the sibling (if it exists)
 
        if prev_sibling_idx != NO_RELATION {
 
            let sibling = &mut target.ranges[prev_sibling_idx as usize];
 
            sibling.next_sibling_idx = popped_idx;
 
        }
 
    }
 

	
 

	
 
    fn check_ascii(&self, source: &InputSource) -> Result<(), ParseError> {
 
        match source.next() {
 
            Some(c) if !c.is_ascii() => {
 
                Err(ParseError::new_error_str_at_pos(source, source.pos(), "encountered a non-ASCII character"))
 
            },
 
            _else => {
 
                Ok(())
 
            },
 
        }
 
    }
 
}
 

	
 
// Helpers for characters
 
fn demarks_definition(ident: &[u8]) -> bool {
 
    return
 
        ident == KW_STRUCT ||
 
            ident == KW_ENUM ||
 
            ident == KW_UNION ||
 
            ident == KW_FUNCTION ||
 
            ident == KW_PRIMITIVE ||
 
            ident == KW_COMPOSITE
 
}
 

	
 
fn demarks_import(ident: &[u8]) -> bool {
 
    return ident == KW_IMPORT;
 
}
 

	
 
fn is_whitespace(c: u8) -> bool {
 
    c.is_ascii_whitespace()
 
}
 

	
 
fn is_char_literal_start(c: u8) -> bool {
 
    return c == b'\'';
 
}
 

	
 
fn is_string_literal_start(c: u8) -> bool {
 
    return c == b'"';
 
}
 

	
 
fn is_pragma_start_or_pound(c: u8) -> bool {
 
    return c == b'#';
 
}
 

	
 
fn is_identifier_start(c: u8) -> bool {
 
    return
 
        (c >= b'a' && c <= b'z') ||
 
            (c >= b'A' && c <= b'Z') ||
 
            c == b'_'
 
}
 

	
 
fn is_identifier_remaining(c: u8) -> bool {
 
    return
 
        (c >= b'0' && c <= b'9') ||
 
            (c >= b'a' && c <= b'z') ||
 
            (c >= b'A' && c <= b'Z') ||
 
            c == b'_'
 
}
 

	
 
fn is_integer_literal_start(c: u8) -> bool {
 
    return c >= b'0' && c <= b'9';
 
}
 

	
 
fn maybe_number_remaining(c: u8) -> bool {
 
    return
 
        (c == b'b' || c == b'B' || c == b'o' || c == b'O' || c == b'x' || c == b'X') ||
 
            (c >= b'0' && c <= b'9') ||
 
            c == b'_';
 
}
 

	
 
#[cfg(test)]
 
mod tests {
 
    use super::*;
 

	
 
    // TODO: Remove at some point
 
    #[test]
 
    fn test_tokenizer() {
 
        let mut source = InputSource::new_test("
 

	
 
        #version 500
 
        # hello 2
 

	
 
        import std.reo::*;
 

	
 
        struct Thing {
 
            int a: 5,
 
        }
 
        enum Hello {
 
            A,
 
            B
 
        }
 

	
 
        // Hello hello, is it me you are looking for?
 
        // I can seee it in your eeeyes
 

	
 
        func something(int a, int b, int c) -> byte {
 
            int a = 5;
 
            struct Inner {
 
                int a
 
            }
 
            struct City {
 
                int b
 
            }
 
            /* Waza
 
            How are you doing
 
            Things in here yo
 
            /* */ */
 

	
 
            a = a + 5 * 2;
 
            struct Pressure {
 
                int d
 
            }
 
        }
 
        ");
 
        let mut t = PassTokenizer::new();
 
        let mut buffer = TokenBuffer::new();
 
        t.tokenize(&mut source, &mut buffer).expect("tokenize");
 

	
 
        println!("Ranges:\n");
 
        for (idx, range) in buffer.ranges.iter().enumerate() {
 
            println!("[{}] {:?}", idx, range)
 
        }
 

	
 
        println!("Tokens:\n");
 
        let mut iter = buffer.tokens.iter().enumerate();
 
        while let Some((idx, token)) = iter.next() {
 
            match token.kind {
 
                TokenKind::Ident | TokenKind::Pragma | TokenKind::Integer |
 
                TokenKind::String | TokenKind::Character | TokenKind::LineComment |
 
                TokenKind::BlockComment => {
 
                    let (_, end) = iter.next().unwrap();
 
                    println!("[{}] {:?} ......", idx, token.kind);
 
                    assert_eq!(end.kind, TokenKind::SpanEnd);
 
                    let text = source.section_at_pos(token.pos, end.pos);
 
                    println!("{}", String::from_utf8_lossy(text));
 
                },
 
                _ => {
 
                    println!("[{}] {:?}", idx, token.kind);
 
                }
 
            }
 
        }
 
    }
 
}
 
\ No newline at end of file
src/protocol/parser/pass_typing.rs
Show inline comments
 
/// pass_typing
 
///
 
/// Performs type inference and type checking. Type inference is implemented by
 
/// applying constraints on (sub)trees of types. During this process the
 
/// resolver takes the `ParserType` structs (the representation of the types
 
/// written by the programmer), converts them to `InferenceType` structs (the
 
/// temporary data structure used during type inference) and attempts to arrive
 
/// at `ConcreteType` structs (the representation of a fully checked and
 
/// validated type).
 
///
 
/// The resolver will visit every statement and expression relevant to the
 
/// procedure and insert and determine its initial type based on context (e.g. a
 
/// return statement's expression must match the function's return type, an
 
/// if statement's test expression must evaluate to a boolean). When all are
 
/// visited we attempt to make progress in evaluating the types. Whenever a type
 
/// is progressed we queue the related expressions for further type progression.
 
/// Once no more expressions are in the queue the algorithm is finished. At this
 
/// point either all types are inferred (or can be trivially implicitly
 
/// determined), or we have incomplete types. In the latter casee we return an
 
/// error.
 
///
 
/// Inference may be applied on non-polymorphic procedures and on polymorphic
 
/// procedures. When dealing with a non-polymorphic procedure we apply the type
 
/// resolver and annotate the AST with the `ConcreteType`s. When dealing with
 
/// polymorphic procedures we will only annotate the AST once, preserving
 
/// references to polymorphic variables. Any later pass will perform just the
 
/// type checking.
 
///
 
/// TODO: Needs a thorough rewrite:
 
///  0. polymorph_progress is intentionally broken at the moment.
 
///  1. For polymorphic type inference we need to have an extra datastructure
 
///     for progressing the polymorphic variables and mapping them back to each
 
///     signature type that uses that polymorphic type. The two types of markers
 
///     became somewhat of a mess.
 
///  2. We're doing a lot of extra work. It seems better to apply the initial
 
///     type based on expression parents, then to apply forced constraints (arg
 
///     to a fires() call must be port-like), only then to start progressing the
 
///     types.
 
///     Furthermore, queueing of expressions can be more intelligent, currently
 
///     every child/parent of an expression is inferred again when queued. Hence
 
///     we need to queue only specific children/parents of expressions.
 
///  3. Remove the `msg` type?
 
///  4. Disallow certain types in certain operations (e.g. `Void`).
 
///  5. Implement implicit and explicit casting.
 
///  6. Investigate different ways of performing the type-on-type inference,
 
///     maybe there is a better way then flattened trees + markers?
 

	
 
macro_rules! debug_log {
 
    ($format:literal) => {
 
        enabled_debug_print!(true, "types", $format);
 
        enabled_debug_print!(false, "types", $format);
 
    };
 
    ($format:literal, $($args:expr),*) => {
 
        enabled_debug_print!(true, "types", $format, $($args),*);
 
        enabled_debug_print!(false, "types", $format, $($args),*);
 
    };
 
}
 

	
 
use std::collections::{HashMap, HashSet};
 

	
 
use crate::protocol::ast::*;
 
use crate::protocol::input_source::ParseError;
 
use crate::protocol::parser::ModuleCompilationPhase;
 
use crate::protocol::parser::type_table::*;
 
use crate::protocol::parser::token_parsing::*;
 
use super::visitor::{
 
    STMT_BUFFER_INIT_CAPACITY,
 
    EXPR_BUFFER_INIT_CAPACITY,
 
    Ctx,
 
    Visitor2,
 
    VisitorResult
 
};
 
use std::collections::hash_map::Entry;
 

	
 
const MESSAGE_TEMPLATE: [InferenceTypePart; 2] = [ InferenceTypePart::Message, InferenceTypePart::UInt8 ];
 
const BOOL_TEMPLATE: [InferenceTypePart; 1] = [ InferenceTypePart::Bool ];
 
const CHARACTER_TEMPLATE: [InferenceTypePart; 1] = [ InferenceTypePart::Character ];
 
const STRING_TEMPLATE: [InferenceTypePart; 1] = [ InferenceTypePart::String ];
 
const NUMBERLIKE_TEMPLATE: [InferenceTypePart; 1] = [ InferenceTypePart::NumberLike ];
 
const INTEGERLIKE_TEMPLATE: [InferenceTypePart; 1] = [ InferenceTypePart::IntegerLike ];
 
const ARRAY_TEMPLATE: [InferenceTypePart; 2] = [ InferenceTypePart::Array, InferenceTypePart::Unknown ];
 
const ARRAYLIKE_TEMPLATE: [InferenceTypePart; 2] = [ InferenceTypePart::ArrayLike, InferenceTypePart::Unknown ];
 

	
 
/// TODO: @performance Turn into PartialOrd+Ord to simplify checks
 
/// TODO: @types Remove the Message -> Byte hack at some point...
 
#[derive(Debug, Clone, Eq, PartialEq)]
 
pub(crate) enum InferenceTypePart {
 
    // A marker with an identifier which we can use to retrieve the type subtree
 
    // that follows the marker. This is used to perform type inference on
 
    // polymorphs: an expression may determine the polymorphs type, after we
 
    // need to apply that information to all other places where the polymorph is
 
    // used.
 
    MarkerDefinition(usize), // marker for polymorph types on a procedure's definition
 
    MarkerBody(usize), // marker for polymorph types within a procedure body
 
    // Completely unknown type, needs to be inferred
 
    Unknown,
 
    // Partially known type, may be inferred to to be the appropriate related 
 
    // type.
 
    // IndexLike,      // index into array/slice
 
    NumberLike,     // any kind of integer/float
 
    IntegerLike,    // any kind of integer
 
    ArrayLike,      // array or slice. Note that this must have a subtype
 
    PortLike,       // input or output port
 
    // Special types that cannot be instantiated by the user
 
    Void, // For builtin functions that do not return anything
 
    // Concrete types without subtypes
 
    Bool,
 
    UInt8,
 
    UInt16,
 
    UInt32,
 
    UInt64,
 
    SInt8,
 
    SInt16,
 
    SInt32,
 
    SInt64,
 
    Character,
 
    String,
 
    // One subtype
 
    Message,
 
    Array,
 
    Slice,
 
    Input,
 
    Output,
 
    // A user-defined type with any number of subtypes
 
    Instance(DefinitionId, usize)
 
}
 

	
 
impl InferenceTypePart {
 
    fn is_marker(&self) -> bool {
 
        use InferenceTypePart as ITP;
 

	
 
        match self {
 
            ITP::MarkerDefinition(_) | ITP::MarkerBody(_) => true,
 
            _ => false,
 
        }
 
    }
 

	
 
    /// Checks if the type is concrete, markers are interpreted as concrete
 
    /// types.
 
    fn is_concrete(&self) -> bool {
 
        use InferenceTypePart as ITP;
 
        match self {
 
            ITP::Unknown | ITP::NumberLike | ITP::IntegerLike | 
 
            ITP::ArrayLike | ITP::PortLike => false,
 
            _ => true
 
        }
 
    }
 

	
 
    fn is_concrete_number(&self) -> bool {
 
        // TODO: @float
 
        use InferenceTypePart as ITP;
 
        match self {
 
            ITP::UInt8 | ITP::UInt16 | ITP::UInt32 | ITP::UInt64 |
 
            ITP::SInt8 | ITP::SInt16 | ITP::SInt32 | ITP::SInt64 => true,
 
            _ => false,
 
        }
 
    }
 

	
 
    fn is_concrete_integer(&self) -> bool {
 
        use InferenceTypePart as ITP;
 
        match self {ITP::UInt8 | ITP::UInt16 | ITP::UInt32 | ITP::UInt64 |
 
        ITP::SInt8 | ITP::SInt16 | ITP::SInt32 | ITP::SInt64 => true,
 
            _ => false,
 
        }
 
    }
 

	
 
    fn is_concrete_msg_array_or_slice(&self) -> bool {
 
        use InferenceTypePart as ITP;
 
        match self {
 
            ITP::Array | ITP::Slice | ITP::Message => true,
 
            _ => false,
 
        }
 
    }
 

	
 
    fn is_concrete_port(&self) -> bool {
 
        use InferenceTypePart as ITP;
 
        match self {
 
            ITP::Input | ITP::Output => true,
 
            _ => false,
 
        }
 
    }
 

	
 
    /// Checks if a part is less specific than the argument. Only checks for 
 
    /// single-part inference (i.e. not the replacement of an `Unknown` variant 
 
    /// with the argument)
 
    fn may_be_inferred_from(&self, arg: &InferenceTypePart) -> bool {
 
        use InferenceTypePart as ITP;
 

	
 
        (*self == ITP::IntegerLike && arg.is_concrete_integer()) ||
 
        (*self == ITP::NumberLike && (arg.is_concrete_number() || *arg == ITP::IntegerLike)) ||
 
        (*self == ITP::ArrayLike && arg.is_concrete_msg_array_or_slice()) ||
 
        (*self == ITP::PortLike && arg.is_concrete_port())
 
    }
 

	
 
    /// Returns the change in "iteration depth" when traversing this particular
 
    /// part. The iteration depth is used to traverse the tree in a linear 
 
    /// fashion. It is basically `number_of_subtypes - 1`
 
    fn depth_change(&self) -> i32 {
 
        use InferenceTypePart as ITP;
 
        match &self {
 
            ITP::Unknown | ITP::NumberLike | ITP::IntegerLike |
 
            ITP::Void | ITP::Bool |
 
            ITP::UInt8 | ITP::UInt16 | ITP::UInt32 | ITP::UInt64 |
 
            ITP::SInt8 | ITP::SInt16 | ITP::SInt32 | ITP::SInt64 |
 
            ITP::Character | ITP::String => {
 
                -1
 
            },
 
            ITP::MarkerDefinition(_) | ITP::MarkerBody(_) |
 
            ITP::ArrayLike | ITP::Message | ITP::Array | ITP::Slice |
 
            ITP::PortLike | ITP::Input | ITP::Output => {
 
                // One subtype, so do not modify depth
 
                0
 
            },
 
            ITP::Instance(_, num_args) => {
 
                (*num_args as i32) - 1
 
            }
 
        }
 
    }
 
}
 

	
 
impl From<ConcreteTypePart> for InferenceTypePart {
 
    fn from(v: ConcreteTypePart) -> InferenceTypePart {
 
        use ConcreteTypePart as CTP;
 
        use InferenceTypePart as ITP;
 

	
 
        match v {
 
            CTP::Marker(_) => {
 
                unreachable!("encountered marker while converting concrete type to inferred type");
 
            }
 
            CTP::Void => ITP::Void,
 
            CTP::Message => ITP::Message,
 
            CTP::Bool => ITP::Bool,
 
            CTP::UInt8 => ITP::UInt8,
 
            CTP::UInt16 => ITP::UInt16,
 
            CTP::UInt32 => ITP::UInt32,
 
            CTP::UInt64 => ITP::UInt64,
 
            CTP::SInt8 => ITP::SInt8,
 
            CTP::SInt16 => ITP::SInt16,
 
            CTP::SInt32 => ITP::SInt32,
 
            CTP::SInt64 => ITP::SInt64,
 
            CTP::Character => ITP::Character,
 
            CTP::String => ITP::String,
 
            CTP::Array => ITP::Array,
 
            CTP::Slice => ITP::Slice,
 
            CTP::Input => ITP::Input,
 
            CTP::Output => ITP::Output,
 
            CTP::Instance(id, num) => ITP::Instance(id, num),
 
        }
 
    }
 
}
 

	
 
#[derive(Debug)]
 
struct InferenceType {
 
    has_body_marker: bool,
 
    is_done: bool,
 
    parts: Vec<InferenceTypePart>,
 
}
 

	
 
impl InferenceType {
 
    /// Generates a new InferenceType. The two boolean flags will be checked in
 
    /// debug mode.
 
    fn new(has_body_marker: bool, is_done: bool, parts: Vec<InferenceTypePart>) -> Self {
 
        if cfg!(debug_assertions) {
 
            debug_assert!(!parts.is_empty());
 
            let parts_body_marker = parts.iter().any(|v| {
 
                if let InferenceTypePart::MarkerBody(_) = v { true } else { false }
 
            });
 
            debug_assert_eq!(has_body_marker, parts_body_marker);
 
            let parts_done = parts.iter().all(|v| v.is_concrete());
 
            debug_assert_eq!(is_done, parts_done, "{:?}", parts);
 
        }
 
        Self{ has_body_marker, is_done, parts }
 
    }
 

	
 
    /// Replaces a type subtree with the provided subtree. The caller must make
 
    /// sure the the replacement is a well formed type subtree.
 
    fn replace_subtree(&mut self, start_idx: usize, with: &[InferenceTypePart]) {
 
        let end_idx = Self::find_subtree_end_idx(&self.parts, start_idx);
 
        debug_assert_eq!(with.len(), Self::find_subtree_end_idx(with, 0));
 
        self.parts.splice(start_idx..end_idx, with.iter().cloned());
 
        self.recompute_is_done();
 
    }
 

	
 
    // TODO: @performance, might all be done inline in the type inference methods
 
    fn recompute_is_done(&mut self) {
 
        self.is_done = self.parts.iter().all(|v| v.is_concrete());
 
    }
 

	
 
    /// Seeks a body marker starting at the specified position. If a marker is
 
    /// found then its value and the index of the type subtree that follows it
 
    /// is returned.
 
    fn find_body_marker(&self, mut start_idx: usize) -> Option<(usize, usize)> {
 
        while start_idx < self.parts.len() {
 
            if let InferenceTypePart::MarkerBody(marker) = &self.parts[start_idx] {
 
                return Some((*marker, start_idx + 1))
 
            }
 

	
 
            start_idx += 1;
 
        }
 

	
 
        None
 
    }
 

	
 
    /// Returns an iterator over all body markers and the partial type tree that
 
    /// follows those markers. If it is a problem that `InferenceType` is 
 
    /// borrowed by the iterator, then use `find_body_marker`.
 
    fn body_marker_iter(&self) -> InferenceTypeMarkerIter {
 
        InferenceTypeMarkerIter::new(&self.parts)
 
    }
 

	
 
    /// Given that the `parts` are a depth-first serialized tree of types, this
 
    /// function finds the subtree anchored at a specific node. The returned 
 
    /// index is exclusive.
 
    fn find_subtree_end_idx(parts: &[InferenceTypePart], start_idx: usize) -> usize {
 
        let mut depth = 1;
 
        let mut idx = start_idx;
 

	
 
        while idx < parts.len() {
 
            depth += parts[idx].depth_change();
 
            if depth == 0 {
 
                return idx + 1;
 
            }
 
            idx += 1;
 
        }
 

	
 
        // If here, then the inference type is malformed
 
        unreachable!();
 
    }
 

	
 
    /// Call that attempts to infer the part at `to_infer.parts[to_infer_idx]` 
 
    /// using the subtree at `template.parts[template_idx]`. Will return 
 
    /// `Some(depth_change_due_to_traversal)` if type inference has been 
 
    /// applied. In this case the indices will also be modified to point to the 
 
    /// next part in both templates. If type inference has not (or: could not) 
 
    /// be applied then `None` will be returned. Note that this might mean that 
 
    /// the types are incompatible.
 
    ///
 
    /// As this is a helper functions, some assumptions: the parts are not 
 
    /// exactly equal, and neither of them contains a marker. Also: only the
 
    /// `to_infer` parts are checked for inference. It might be that this 
 
    /// function returns `None`, but that that `template` is still compatible
 
    /// with `to_infer`, e.g. when `template` has an `Unknown` part.
 
    fn infer_part_for_single_type(
 
        to_infer: &mut InferenceType, to_infer_idx: &mut usize,
 
        template_parts: &[InferenceTypePart], template_idx: &mut usize,
 
    ) -> Option<i32> {
 
        use InferenceTypePart as ITP;
 

	
 
        let to_infer_part = &to_infer.parts[*to_infer_idx];
 
        let template_part = &template_parts[*template_idx];
 

	
 
        // TODO: Maybe do this differently?
 
        let mut template_definition_marker = None;
 
        if *template_idx > 0 {
 
            if let ITP::MarkerDefinition(marker) = &template_parts[*template_idx - 1] {
 
                template_definition_marker = Some(*marker)
 
            }
 
        }
 

	
 
        // Check for programmer mistakes
 
        debug_assert_ne!(to_infer_part, template_part);
 
        debug_assert!(!to_infer_part.is_marker(), "marker encountered in 'infer part'");
 
        debug_assert!(!template_part.is_marker(), "marker encountered in 'template part'");
 

	
 
        // Inference of a somewhat-specified type
 
        if to_infer_part.may_be_inferred_from(template_part) {
 
            let depth_change = to_infer_part.depth_change();
 
            debug_assert_eq!(depth_change, template_part.depth_change());
 

	
 
            if let Some(marker) = template_definition_marker {
 
                to_infer.parts.insert(*to_infer_idx, ITP::MarkerDefinition(marker));
 
                *to_infer_idx += 1;
 
            }
 

	
 
            to_infer.parts[*to_infer_idx] = template_part.clone();
 

	
 
            *to_infer_idx += 1;
 
            *template_idx += 1;
 
            return Some(depth_change);
 
        }
 

	
 
        // Inference of a completely unknown type
 
        if *to_infer_part == ITP::Unknown {
 
            // template part is different, so cannot be unknown, hence copy the
 
            // entire subtree. Make sure to copy definition markers, but not to
 
            // transfer polymorph markers.
 
            let template_end_idx = Self::find_subtree_end_idx(template_parts, *template_idx);
 
            let template_start_idx = if let Some(marker) = template_definition_marker {
 
                to_infer.parts[*to_infer_idx] = ITP::MarkerDefinition(marker);
 
                *template_idx
 
            } else {
 
                to_infer.parts[*to_infer_idx] = template_parts[*template_idx].clone();
 
                *template_idx + 1
 
            };
 
            *to_infer_idx += 1;
 

	
 
            for template_idx in template_start_idx..template_end_idx {
 
                let template_part = &template_parts[template_idx];
 
                if let ITP::MarkerBody(_) = template_part {
 
                    // Do not copy this one
 
                } else {
 
                    to_infer.parts.insert(*to_infer_idx, template_part.clone());
 
                    *to_infer_idx += 1;
 
                }
 
            }
 
            *template_idx = template_end_idx;
 

	
 
            // Note: by definition the LHS was Unknown and the RHS traversed a 
 
            // full subtree.
 
            return Some(-1);
 
        }
 

	
 
        None
 
    }
 

	
 
    /// Call that checks if the `to_check` part is compatible with the `infer`
 
    /// part. This is essentially a copy of `infer_part_for_single_type`, but
 
    /// without actually copying the type parts.
 
    fn check_part_for_single_type(
 
        to_check_parts: &[InferenceTypePart], to_check_idx: &mut usize,
 
        template_parts: &[InferenceTypePart], template_idx: &mut usize
 
    ) -> Option<i32> {
 
        use InferenceTypePart as ITP;
 

	
 
        let to_check_part = &to_check_parts[*to_check_idx];
 
        let template_part = &template_parts[*template_idx];
 

	
 
        // Checking programmer errors
 
        debug_assert_ne!(to_check_part, template_part);
 
        debug_assert!(!to_check_part.is_marker(), "marker encountered in 'to_check part'");
 
        debug_assert!(!template_part.is_marker(), "marker encountered in 'template part'");
 

	
 
        if to_check_part.may_be_inferred_from(template_part) {
 
            let depth_change = to_check_part.depth_change();
 
            debug_assert_eq!(depth_change, template_part.depth_change());
 
            *to_check_idx += 1;
 
            *template_idx += 1;
 
            return Some(depth_change);
 
        }
 

	
 
        if *to_check_part == ITP::Unknown {
 
            *to_check_idx += 1;
 
            *template_idx = Self::find_subtree_end_idx(template_parts, *template_idx);
 

	
 
            // By definition LHS and RHS had depth change of -1
 
            return Some(-1);
 
        }
 

	
 
        None
 
    }
 

	
 
    /// Attempts to infer types between two `InferenceType` instances. This 
 
    /// function is unsafe as it accepts pointers to work around Rust's 
 
    /// borrowing rules. The caller must ensure that the pointers are distinct.
 
    unsafe fn infer_subtrees_for_both_types(
 
        type_a: *mut InferenceType, start_idx_a: usize,
 
        type_b: *mut InferenceType, start_idx_b: usize
 
    ) -> DualInferenceResult {
 
        debug_assert!(!std::ptr::eq(type_a, type_b), "encountered pointers to the same inference type");
 
        let type_a = &mut *type_a;
 
        let type_b = &mut *type_b;
 

	
 
        let mut modified_a = false;
 
        let mut modified_b = false;
 
        let mut idx_a = start_idx_a;
 
        let mut idx_b = start_idx_b;
 
        let mut depth = 1;
 

	
 
        while depth > 0 {
 
            // Advance indices if we encounter markers or equal parts
 
            let part_a = &type_a.parts[idx_a];
 
            let part_b = &type_b.parts[idx_b];
 
            
 
            if part_a == part_b {
 
                let depth_change = part_a.depth_change();
 
                depth += depth_change;
 
                debug_assert_eq!(depth_change, part_b.depth_change());
 
                idx_a += 1;
 
                idx_b += 1;
 
                continue;
 
            }
 
            if part_a.is_marker() { idx_a += 1; continue; }
 
            if part_b.is_marker() { idx_b += 1; continue; }
 

	
 
            // Types are not equal and are both not markers
 
            if let Some(depth_change) = Self::infer_part_for_single_type(type_a, &mut idx_a, &type_b.parts, &mut idx_b) {
 
                depth += depth_change;
 
                modified_a = true;
 
                continue;
 
            }
 
            if let Some(depth_change) = Self::infer_part_for_single_type(type_b, &mut idx_b, &type_a.parts, &mut idx_a) {
 
                depth += depth_change;
 
                modified_b = true;
 
                continue;
 
            }
 

	
 
            // And can also not be inferred in any way: types must be incompatible
 
            return DualInferenceResult::Incompatible;
 
        }
 

	
 
        if modified_a { type_a.recompute_is_done(); }
 
        if modified_b { type_b.recompute_is_done(); }
 

	
 
        // If here then we completely inferred the subtrees.
 
        match (modified_a, modified_b) {
 
            (false, false) => DualInferenceResult::Neither,
 
            (false, true) => DualInferenceResult::Second,
 
            (true, false) => DualInferenceResult::First,
 
            (true, true) => DualInferenceResult::Both
 
        }
 
    }
 

	
 
    /// Attempts to infer the first subtree based on the template. Like
 
    /// `infer_subtrees_for_both_types`, but now only applying inference to
 
    /// `to_infer` based on the type information in `template`.
 
    /// Secondary use is to make sure that a type follows a certain template.
 
    fn infer_subtree_for_single_type(
 
        to_infer: &mut InferenceType, mut to_infer_idx: usize,
 
        template: &[InferenceTypePart], mut template_idx: usize,
 
    ) -> SingleInferenceResult {
 
        let mut modified = false;
 
        let mut depth = 1;
 

	
 
        while depth > 0 {
 
            let to_infer_part = &to_infer.parts[to_infer_idx];
 
            let template_part = &template[template_idx];
 

	
 
            if to_infer_part == template_part {
 
                let depth_change = to_infer_part.depth_change();
 
                depth += depth_change;
 
                debug_assert_eq!(depth_change, template_part.depth_change());
 
                to_infer_idx += 1;
 
                template_idx += 1;
 
                continue;
 
            }
 
            if to_infer_part.is_marker() { to_infer_idx += 1; continue; }
 
            if template_part.is_marker() { template_idx += 1; continue; }
 

	
 
            // Types are not equal and not markers. So check if we can infer 
 
            // anything
 
            if let Some(depth_change) = Self::infer_part_for_single_type(
 
                to_infer, &mut to_infer_idx, template, &mut template_idx
 
            ) {
 
                depth += depth_change;
 
                modified = true;
 
                continue;
 
            }
 

	
 
            // We cannot infer anything, but the template may still be 
 
            // compatible with the type we're inferring
 
            if let Some(depth_change) = Self::check_part_for_single_type(
 
                template, &mut template_idx, &to_infer.parts, &mut to_infer_idx
 
            ) {
 
                depth += depth_change;
 
                continue;
 
            }
 

	
 
            return SingleInferenceResult::Incompatible
 
        }
 

	
 
        return if modified {
 
            to_infer.recompute_is_done();
 
            SingleInferenceResult::Modified
 
        } else {
 
            SingleInferenceResult::Unmodified
 
        }
 
    }
 

	
 
    /// Checks if both types are compatible, doesn't perform any inference
 
    fn check_subtrees(
 
        type_parts_a: &[InferenceTypePart], start_idx_a: usize,
 
        type_parts_b: &[InferenceTypePart], start_idx_b: usize
 
    ) -> bool {
 
        let mut depth = 1;
 
        let mut idx_a = start_idx_a;
 
        let mut idx_b = start_idx_b;
 

	
 
        while depth > 0 {
 
            let part_a = &type_parts_a[idx_a];
 
            let part_b = &type_parts_b[idx_b];
 

	
 
            if part_a == part_b {
 
                let depth_change = part_a.depth_change();
 
                depth += depth_change;
 
                debug_assert_eq!(depth_change, part_b.depth_change());
 
                idx_a += 1;
 
                idx_b += 1;
 
                continue;
 
            }
 
            
 
            if part_a.is_marker() { idx_a += 1; continue; }
 
            if part_b.is_marker() { idx_b += 1; continue; }
 

	
 
            if let Some(depth_change) = Self::check_part_for_single_type(
 
                type_parts_a, &mut idx_a, type_parts_b, &mut idx_b
 
            ) {
 
                depth += depth_change;
 
                continue;
 
            }
 
            if let Some(depth_change) = Self::check_part_for_single_type(
 
                type_parts_b, &mut idx_b, type_parts_a, &mut idx_a
 
            ) {
 
                depth += depth_change;
 
                continue;
 
            }
 

	
 
            return false;
 
        }
 

	
 
        true
 
    }
 

	
 
    /// Performs the conversion of the inference type into a concrete type.
 
    /// By calling this function you must make sure that no unspecified types
 
    /// (e.g. Unknown or IntegerLike) exist in the type.
 
    fn write_concrete_type(&self, concrete_type: &mut ConcreteType) {
 
        use InferenceTypePart as ITP;
 
        use ConcreteTypePart as CTP;
 

	
 
        // Make sure inference type is specified but concrete type is not yet specified
 
        debug_assert!(!self.parts.is_empty());
 
        debug_assert!(concrete_type.parts.is_empty());
 
        concrete_type.parts.reserve(self.parts.len());
 

	
 
        let mut idx = 0;
 
        while idx < self.parts.len() {
 
            let part = &self.parts[idx];
 
            let converted_part = match part {
 
                ITP::MarkerDefinition(marker) => {
 
                    // Outer markers are converted to regular markers, we
 
                    // completely remove the type subtree that follows it
 
                    idx = InferenceType::find_subtree_end_idx(&self.parts, idx + 1);
 
                    concrete_type.parts.push(CTP::Marker(*marker));
 
                    continue;
 
                },
 
                ITP::MarkerBody(_) => {
 
                    // Inner markers are removed when writing to the concrete
 
                    // type.
 
                    idx += 1;
 
                    continue;
 
                },
 
                ITP::Unknown | ITP::NumberLike | ITP::IntegerLike | ITP::ArrayLike | ITP::PortLike => {
 
                    unreachable!("Attempted to convert inference type part {:?} into concrete type", part);
 
                },
 
                ITP::Void => CTP::Void,
 
                ITP::Message => CTP::Message,
 
                ITP::Bool => CTP::Bool,
 
                ITP::UInt8 => CTP::UInt8,
 
                ITP::UInt16 => CTP::UInt16,
 
                ITP::UInt32 => CTP::UInt32,
 
                ITP::UInt64 => CTP::UInt64,
 
                ITP::SInt8 => CTP::SInt8,
 
                ITP::SInt16 => CTP::SInt16,
 
                ITP::SInt32 => CTP::SInt32,
 
                ITP::SInt64 => CTP::SInt64,
 
                ITP::Character => CTP::Character,
 
                ITP::String => CTP::String,
 
                ITP::Array => CTP::Array,
 
                ITP::Slice => CTP::Slice,
 
                ITP::Input => CTP::Input,
 
                ITP::Output => CTP::Output,
 
                ITP::Instance(id, num) => CTP::Instance(*id, *num),
 
            };
 

	
 
            concrete_type.parts.push(converted_part);
 
            idx += 1;
 
        }
 
    }
 

	
 
    /// Writes a human-readable version of the type to a string. This is used
 
    /// to display error messages
 
    fn write_display_name(
 
        buffer: &mut String, heap: &Heap, parts: &[InferenceTypePart], mut idx: usize
 
    ) -> usize {
 
        use InferenceTypePart as ITP;
 

	
 
        match &parts[idx] {
 
            ITP::MarkerDefinition(thing) => {
 
                buffer.push_str(&format!("{{D:{}}}", *thing));
 
                idx = Self::write_display_name(buffer, heap, parts, idx + 1);
 
            }, 
 
            ITP::MarkerBody(thing) => {
 
                buffer.push_str(&format!("{{B:{}}}", *thing));
 
                idx = Self::write_display_name(buffer, heap, parts, idx + 1);
 
            },
 
            ITP::Unknown => buffer.push_str("?"),
 
            ITP::NumberLike => buffer.push_str("numberlike"),
 
            ITP::IntegerLike => buffer.push_str("integerlike"),
 
            ITP::ArrayLike => {
 
                idx = Self::write_display_name(buffer, heap, parts, idx + 1);
 
                buffer.push_str("[?]");
 
            },
 
            ITP::PortLike => {
 
                buffer.push_str("portlike<");
 
                idx = Self::write_display_name(buffer, heap, parts, idx + 1);
 
                buffer.push('>');
 
            }
 
            ITP::Void => buffer.push_str("void"),
 
            ITP::Bool => buffer.push_str(KW_TYPE_BOOL_STR),
 
            ITP::UInt8 => buffer.push_str(KW_TYPE_UINT8_STR),
 
            ITP::UInt16 => buffer.push_str(KW_TYPE_UINT16_STR),
 
            ITP::UInt32 => buffer.push_str(KW_TYPE_UINT32_STR),
 
            ITP::UInt64 => buffer.push_str(KW_TYPE_UINT64_STR),
 
            ITP::SInt8 => buffer.push_str(KW_TYPE_SINT8_STR),
 
            ITP::SInt16 => buffer.push_str(KW_TYPE_SINT16_STR),
 
            ITP::SInt32 => buffer.push_str(KW_TYPE_SINT32_STR),
 
            ITP::SInt64 => buffer.push_str(KW_TYPE_SINT64_STR),
 
            ITP::Character => buffer.push_str(KW_TYPE_CHAR_STR),
 
            ITP::String => buffer.push_str(KW_TYPE_STRING_STR),
 
            ITP::Message => {
 
                buffer.push_str(KW_TYPE_MESSAGE_STR);
 
                buffer.push('<');
 
                idx = Self::write_display_name(buffer, heap, parts, idx + 1);
 
                buffer.push('>');
 
            },
 
            ITP::Array => {
 
                idx = Self::write_display_name(buffer, heap, parts, idx + 1);
 
                buffer.push_str("[]");
 
            },
 
            ITP::Slice => {
 
                idx = Self::write_display_name(buffer, heap, parts, idx + 1);
 
                buffer.push_str("[..]");
 
            },
 
            ITP::Input => {
 
                buffer.push_str(KW_TYPE_IN_PORT_STR);
 
                buffer.push('<');
 
                idx = Self::write_display_name(buffer, heap, parts, idx + 1);
 
                buffer.push('>');
 
            },
 
            ITP::Output => {
 
                buffer.push_str(KW_TYPE_OUT_PORT_STR);
 
                buffer.push('<');
 
                idx = Self::write_display_name(buffer, heap, parts, idx + 1);
 
                buffer.push('>');
 
            },
 
            ITP::Instance(definition_id, num_sub) => {
 
                let definition = &heap[*definition_id];
 
                buffer.push_str(definition.identifier().value.as_str());
 
                if *num_sub > 0 {
 
                    buffer.push('<');
 
                    idx = Self::write_display_name(buffer, heap, parts, idx + 1);
 
                    for _sub_idx in 1..*num_sub {
 
                        buffer.push_str(", ");
 
                        idx = Self::write_display_name(buffer, heap, parts, idx + 1);
 
                    }
 
                    buffer.push('>');
 
                }
 
            },
 
        }
 

	
 
        idx
 
    }
 

	
 
    /// Returns the display name of a (part of) the type tree. Will allocate a
 
    /// string.
 
    fn partial_display_name(heap: &Heap, parts: &[InferenceTypePart]) -> String {
 
        let mut buffer = String::with_capacity(parts.len() * 6);
 
        Self::write_display_name(&mut buffer, heap, parts, 0);
 
        buffer
 
    }
 

	
 
    /// Returns the display name of the full type tree. Will allocate a string.
 
    fn display_name(&self, heap: &Heap) -> String {
 
        Self::partial_display_name(heap, &self.parts)
 
    }
 
}
 

	
 
/// Iterator over the subtrees that follow a marker in an `InferenceType`
 
/// instance. Returns immutable slices over the internal parts
 
struct InferenceTypeMarkerIter<'a> {
 
    parts: &'a [InferenceTypePart],
 
    idx: usize,
 
}
 

	
 
impl<'a> InferenceTypeMarkerIter<'a> {
 
    fn new(parts: &'a [InferenceTypePart]) -> Self {
 
        Self{ parts, idx: 0 }
 
    }
 
}
 

	
 
impl<'a> Iterator for InferenceTypeMarkerIter<'a> {
 
    type Item = (usize, &'a [InferenceTypePart]);
 

	
 
    fn next(&mut self) -> Option<Self::Item> {
 
        // Iterate until we find a marker
 
        while self.idx < self.parts.len() {
 
            if let InferenceTypePart::MarkerBody(marker) = self.parts[self.idx] {
 
                // Found a marker, find the subtree end
 
                let start_idx = self.idx + 1;
 
                let end_idx = InferenceType::find_subtree_end_idx(self.parts, start_idx);
 

	
 
                // Modify internal index, then return items
 
                self.idx = end_idx;
 
                return Some((marker, &self.parts[start_idx..end_idx]));
 
            }
 

	
 
            self.idx += 1;
 
        }
 

	
 
        None
 
    }
 
}
 

	
 
#[derive(Debug, PartialEq, Eq)]
 
enum DualInferenceResult {
 
    Neither,        // neither argument is clarified
 
    First,          // first argument is clarified using the second one
 
    Second,         // second argument is clarified using the first one
 
    Both,           // both arguments are clarified
 
    Incompatible,   // types are incompatible: programmer error
 
}
 

	
 
impl DualInferenceResult {
 
    fn modified_lhs(&self) -> bool {
 
        match self {
 
            DualInferenceResult::First | DualInferenceResult::Both => true,
 
            _ => false
 
        }
 
    }
 
    fn modified_rhs(&self) -> bool {
 
        match self {
 
            DualInferenceResult::Second | DualInferenceResult::Both => true,
 
            _ => false
src/protocol/parser/pass_validation_linking.rs
Show inline comments
 
use crate::collections::{ScopedBuffer};
 
use crate::protocol::ast::*;
 
use crate::protocol::input_source::*;
 
use crate::protocol::parser::symbol_table::*;
 
use crate::protocol::parser::type_table::*;
 

	
 
use super::visitor::{
 
    STMT_BUFFER_INIT_CAPACITY,
 
    EXPR_BUFFER_INIT_CAPACITY,
 
    Ctx, 
 
    Visitor2, 
 
    VisitorResult
 
};
 
use crate::protocol::parser::ModuleCompilationPhase;
 

	
 
#[derive(PartialEq, Eq)]
 
enum DefinitionType {
 
    Primitive(ComponentDefinitionId),
 
    Composite(ComponentDefinitionId),
 
    Function(FunctionDefinitionId)
 
}
 

	
 
impl DefinitionType {
 
    fn is_primitive(&self) -> bool { if let Self::Primitive(_) = self { true } else { false } }
 
    fn is_composite(&self) -> bool { if let Self::Composite(_) = self { true } else { false } }
 
    fn is_function(&self) -> bool { if let Self::Function(_) = self { true } else { false } }
 
    fn definition_id(&self) -> DefinitionId {
 
        match self {
 
            DefinitionType::Primitive(v) => v.upcast(),
 
            DefinitionType::Composite(v) => v.upcast(),
 
            DefinitionType::Function(v) => v.upcast(),
 
        }
 
    }
 
}
 

	
 
/// This particular visitor will go through the entire AST in a recursive manner
 
/// and check if all statements and expressions are legal (e.g. no "return"
 
/// statements in component definitions), and will link certain AST nodes to
 
/// their appropriate targets (e.g. goto statements, or function calls).
 
///
 
/// This visitor will not perform control-flow analysis (e.g. making sure that
 
/// each function actually returns) and will also not perform type checking. So
 
/// the linking of function calls and component instantiations will be checked
 
/// and linked to the appropriate definitions, but the return types and/or
 
/// arguments will not be checked for validity.
 
///
 
///
 
/// Because of this scheme expressions will not be visited in the breadth-first
 
/// pass.
 
pub(crate) struct PassValidationLinking {
 
    /// `in_sync` is `Some(id)` if the visitor is visiting the children of a
 
    /// synchronous statement. A single value is sufficient as nested
 
    /// synchronous statements are not allowed
 
    in_sync: Option<SynchronousStatementId>,
 
    /// `in_while` contains the last encountered `While` statement. This is used
 
    /// to resolve unlabeled `Continue`/`Break` statements.
 
    in_while: Option<WhileStatementId>,
 
    // Traversal state: current scope (which can be used to find the parent
 
    // scope), the definition variant we are considering, and whether the
 
    // visitor is performing breadthwise block statement traversal.
 
    cur_scope: Option<Scope>,
 
    def_type: DefinitionType,
 
    // Parent expression (the previous stmt/expression we visited that could be
 
    // used as an expression parent)
 
    expr_parent: ExpressionParent,
 
    // Keeping track of relative position in block in the breadth-first pass.
 
    // May not correspond to block.statement[index] if any statements are
 
    // inserted after the breadth-pass
 
    relative_pos_in_block: u32,
 
    definition_buffer: ScopedBuffer<DefinitionId>,
 
    // Single buffer of statement IDs that we want to traverse in a block.
 
    // Required to work around Rust borrowing rules and to prevent constant
 
    // cloning of vectors.
 
    statement_buffer: ScopedBuffer<StatementId>,
 
    // Another buffer, now with expression IDs, to prevent constant cloning of
 
    // vectors while working around rust's borrowing rules
 
    expression_buffer: ScopedBuffer<ExpressionId>,
 
}
 

	
 
impl PassValidationLinking {
 
    pub(crate) fn new() -> Self {
 
        Self{
 
            in_sync: None,
 
            in_while: None,
 
            cur_scope: None,
 
            expr_parent: ExpressionParent::None,
 
            def_type: DefinitionType::Function(FunctionDefinitionId::new_invalid()),
 
            relative_pos_in_block: 0,
 
            definition_buffer: ScopedBuffer::new_reserved(128),
 
            statement_buffer: ScopedBuffer::new_reserved(STMT_BUFFER_INIT_CAPACITY),
 
            expression_buffer: ScopedBuffer::new_reserved(EXPR_BUFFER_INIT_CAPACITY),
 
        }
 
    }
 

	
 
    fn reset_state(&mut self) {
 
        self.in_sync = None;
 
        self.in_while = None;
 
        self.cur_scope = None;
 
        self.expr_parent = ExpressionParent::None;
 
        self.def_type = DefinitionType::Function(FunctionDefinitionId::new_invalid());
 
        self.relative_pos_in_block = 0;
 
    }
 
}
 

	
 
impl Visitor2 for PassValidationLinking {
 
    fn visit_module(&mut self, ctx: &mut Ctx) -> VisitorResult {
 
        debug_assert_eq!(ctx.module.phase, ModuleCompilationPhase::TypesAddedToTable);
 

	
 
        let root = &ctx.heap[ctx.module.root_id];
 
        let section = self.definition_buffer.start_section_initialized(&root.definitions);
 
        for definition_idx in 0..section.len() {
 
            let definition_id = section[definition_idx];
 
            self.visit_definition(ctx, definition_id)?;
 
        }
 
        section.forget();
 

	
 
        ctx.module.phase = ModuleCompilationPhase::ValidatedAndLinked;
 
        Ok(())
 
    }
 
    //--------------------------------------------------------------------------
 
    // Definition visitors
 
    //--------------------------------------------------------------------------
 

	
 
    fn visit_component_definition(&mut self, ctx: &mut Ctx, id: ComponentDefinitionId) -> VisitorResult {
 
        self.reset_state();
 

	
 
        self.def_type = match &ctx.heap[id].variant {
 
            ComponentVariant::Primitive => DefinitionType::Primitive(id),
 
            ComponentVariant::Composite => DefinitionType::Composite(id),
 
        };
 
        self.cur_scope = Some(Scope::Definition(id.upcast()));
 
        self.expr_parent = ExpressionParent::None;
 

	
 
        // Visit statements in component body
 
        let body_id = ctx.heap[id].body;
 
        self.visit_block_stmt(ctx, body_id)?;
 
        Ok(())
 
    }
 

	
 
    fn visit_function_definition(&mut self, ctx: &mut Ctx, id: FunctionDefinitionId) -> VisitorResult {
 
        self.reset_state();
 

	
 
        // Set internal statement indices
 
        self.def_type = DefinitionType::Function(id);
 
        self.cur_scope = Some(Scope::Definition(id.upcast()));
 
        self.expr_parent = ExpressionParent::None;
 

	
 
        // Visit statements in function body
 
        let body_id = ctx.heap[id].body;
 
        self.visit_block_stmt(ctx, body_id)?;
 
        Ok(())
 
    }
 

	
 
    //--------------------------------------------------------------------------
 
    // Statement visitors
 
    //--------------------------------------------------------------------------
 

	
 
    fn visit_block_stmt(&mut self, ctx: &mut Ctx, id: BlockStatementId) -> VisitorResult {
 
        self.visit_block_stmt_with_hint(ctx, id, None)
 
    }
 

	
 
    fn visit_local_memory_stmt(&mut self, _ctx: &mut Ctx, _id: MemoryStatementId) -> VisitorResult {
 
        Ok(())
 
    }
 

	
 
    fn visit_local_channel_stmt(&mut self, _ctx: &mut Ctx, _id: ChannelStatementId) -> VisitorResult {
 
        Ok(())
 
    }
 

	
 
    fn visit_labeled_stmt(&mut self, ctx: &mut Ctx, id: LabeledStatementId) -> VisitorResult {
 
        let body_id = ctx.heap[id].body;
 
        self.visit_stmt(ctx, body_id)?;
 

	
 
        Ok(())
 
    }
 

	
 
    fn visit_if_stmt(&mut self, ctx: &mut Ctx, id: IfStatementId) -> VisitorResult {
 
        // Traverse expression and bodies
 
        let (test_id, true_id, false_id) = {
 
            let stmt = &ctx.heap[id];
 
            (stmt.test, stmt.true_body, stmt.false_body)
 
        };
 

	
 
        debug_assert_eq!(self.expr_parent, ExpressionParent::None);
 
        self.expr_parent = ExpressionParent::If(id);
 
        self.visit_expr(ctx, test_id)?;
 
        self.expr_parent = ExpressionParent::None;
 

	
 
        self.visit_block_stmt(ctx, true_id)?;
 
        if let Some(false_id) = false_id {
 
            self.visit_block_stmt(ctx, false_id)?;
 
        }
 

	
 
        Ok(())
 
    }
 

	
 
    fn visit_while_stmt(&mut self, ctx: &mut Ctx, id: WhileStatementId) -> VisitorResult {
 
        let (test_id, body_id) = {
 
            let stmt = &ctx.heap[id];
 
            (stmt.test, stmt.body)
 
        };
 
        let old_while = self.in_while.replace(id);
 
        debug_assert_eq!(self.expr_parent, ExpressionParent::None);
 
        self.expr_parent = ExpressionParent::While(id);
 
        self.visit_expr(ctx, test_id)?;
 
        self.expr_parent = ExpressionParent::None;
 

	
 
        self.visit_block_stmt(ctx, body_id)?;
 
        self.in_while = old_while;
 

	
 
        Ok(())
 
    }
 

	
 
    fn visit_break_stmt(&mut self, ctx: &mut Ctx, id: BreakStatementId) -> VisitorResult {
 
        // Resolve break target
 
        let target_end_while = {
 
            let stmt = &ctx.heap[id];
 
            let target_while_id = self.resolve_break_or_continue_target(ctx, stmt.span, &stmt.label)?;
 
            let target_while = &ctx.heap[target_while_id];
 
            debug_assert!(target_while.end_while.is_some());
 
            target_while.end_while.unwrap()
 
        };
 

	
 
        let stmt = &mut ctx.heap[id];
 
        stmt.target = Some(target_end_while);
 

	
 
        Ok(())
 
    }
 

	
 
    fn visit_continue_stmt(&mut self, ctx: &mut Ctx, id: ContinueStatementId) -> VisitorResult {
 
        // Resolve continue target
 
        let target_while_id = {
 
            let stmt = &ctx.heap[id];
 
            self.resolve_break_or_continue_target(ctx, stmt.span, &stmt.label)?
 
        };
 

	
 
        let stmt = &mut ctx.heap[id];
 
        stmt.target = Some(target_while_id);
 

	
 
        Ok(())
 
    }
 

	
 
    fn visit_synchronous_stmt(&mut self, ctx: &mut Ctx, id: SynchronousStatementId) -> VisitorResult {
 
        // Check for validity of synchronous statement
 
        let cur_sync_span = ctx.heap[id].span;
 
        if self.in_sync.is_some() {
 
            // Nested synchronous statement
 
            let old_sync_span = ctx.heap[self.in_sync.unwrap()].span;
 
            return Err(ParseError::new_error_str_at_span(
 
                &ctx.module.source, cur_sync_span, "Illegal nested synchronous statement"
 
            ).with_info_str_at_span(
 
                &ctx.module.source, old_sync_span, "It is nested in this synchronous statement"
 
            ));
 
        }
 

	
 
        if !self.def_type.is_primitive() {
 
            return Err(ParseError::new_error_str_at_span(
 
                &ctx.module.source, cur_sync_span,
 
                "synchronous statements may only be used in primitive components"
 
            ));
 
        }
 

	
 
        let sync_body = ctx.heap[id].body;
 
        let old = self.in_sync.replace(id);
 
        self.visit_block_stmt_with_hint(ctx, sync_body, Some(id))?;
 
        self.in_sync = old;
 

	
 
        Ok(())
 
    }
 

	
 
    fn visit_return_stmt(&mut self, ctx: &mut Ctx, id: ReturnStatementId) -> VisitorResult {
 
        // Check if "return" occurs within a function
 
        let stmt = &ctx.heap[id];
 
        if !self.def_type.is_function() {
 
            return Err(ParseError::new_error_str_at_span(
 
                &ctx.module.source, stmt.span,
 
                "return statements may only appear in function bodies"
 
            ));
 
        }
 

	
 
        // If here then we are within a function
 
        debug_assert_eq!(self.expr_parent, ExpressionParent::None);
 
        debug_assert_eq!(ctx.heap[id].expressions.len(), 1);
 
        self.expr_parent = ExpressionParent::Return(id);
 
        self.visit_expr(ctx, ctx.heap[id].expressions[0])?;
 
        self.expr_parent = ExpressionParent::None;
 

	
 
        Ok(())
 
    }
 

	
 
    fn visit_goto_stmt(&mut self, ctx: &mut Ctx, id: GotoStatementId) -> VisitorResult {
 
        let target_id = self.find_label(ctx, &ctx.heap[id].label)?;
 
        ctx.heap[id].target = Some(target_id);
 

	
 
        let target = &ctx.heap[target_id];
 
        if self.in_sync != target.in_sync {
 
            // We can only goto the current scope or outer scopes. Because
 
            // nested sync statements are not allowed so if the value does
 
            // not match, then we must be inside a sync scope
 
            debug_assert!(self.in_sync.is_some());
 
            let goto_stmt = &ctx.heap[id];
 
            let sync_stmt = &ctx.heap[self.in_sync.unwrap()];
 
            return Err(
 
                ParseError::new_error_str_at_span(&ctx.module.source, goto_stmt.span, "goto may not escape the surrounding synchronous block")
 
                .with_info_str_at_span(&ctx.module.source, target.label.span, "this is the target of the goto statement")
 
                .with_info_str_at_span(&ctx.module.source, sync_stmt.span, "which will jump past this statement")
 
            );
 
        }
 

	
 
        Ok(())
 
    }
 

	
 
    fn visit_new_stmt(&mut self, ctx: &mut Ctx, id: NewStatementId) -> VisitorResult {
 
        // Make sure the new statement occurs inside a composite component
 
        if !self.def_type.is_composite() {
 
            let new_stmt = &ctx.heap[id];
 
            return Err(ParseError::new_error_str_at_span(
 
                &ctx.module.source, new_stmt.span,
 
                "instantiating components may only be done in composite components"
 
            ));
 
        }
 

	
 
        // Recurse into call expression (which will check the expression parent
 
        // to ensure that the "new" statment instantiates a component)
 
        let call_expr_id = ctx.heap[id].expression;
 

	
 
        debug_assert_eq!(self.expr_parent, ExpressionParent::None);
 
        self.expr_parent = ExpressionParent::New(id);
 
        self.visit_call_expr(ctx, call_expr_id)?;
 
        self.expr_parent = ExpressionParent::None;
 

	
 
        Ok(())
 
    }
 

	
 
    fn visit_expr_stmt(&mut self, ctx: &mut Ctx, id: ExpressionStatementId) -> VisitorResult {
 
        let expr_id = ctx.heap[id].expression;
 

	
 
        debug_assert_eq!(self.expr_parent, ExpressionParent::None);
 
        self.expr_parent = ExpressionParent::ExpressionStmt(id);
 
        self.visit_expr(ctx, expr_id)?;
 
        self.expr_parent = ExpressionParent::None;
 

	
 
        Ok(())
 
    }
 

	
 

	
 
    //--------------------------------------------------------------------------
 
    // Expression visitors
 
    //--------------------------------------------------------------------------
 

	
 
    fn visit_assignment_expr(&mut self, ctx: &mut Ctx, id: AssignmentExpressionId) -> VisitorResult {
 
        let upcast_id = id.upcast();
 
        let assignment_expr = &mut ctx.heap[id];
 

	
 
        let left_expr_id = assignment_expr.left;
 
        let right_expr_id = assignment_expr.right;
 
        let old_expr_parent = self.expr_parent;
 
        assignment_expr.parent = old_expr_parent;
 

	
 
        self.expr_parent = ExpressionParent::Expression(upcast_id, 0);
 
        self.visit_expr(ctx, left_expr_id)?;
 
        self.expr_parent = ExpressionParent::Expression(upcast_id, 1);
 
        self.visit_expr(ctx, right_expr_id)?;
 
        self.expr_parent = old_expr_parent;
 
        Ok(())
 
    }
 

	
 
    fn visit_conditional_expr(&mut self, ctx: &mut Ctx, id: ConditionalExpressionId) -> VisitorResult {
 
        let upcast_id = id.upcast();
 
        let conditional_expr = &mut ctx.heap[id];
 

	
 
        let test_expr_id = conditional_expr.test;
 
        let true_expr_id = conditional_expr.true_expression;
 
        let false_expr_id = conditional_expr.false_expression;
 

	
 
        let old_expr_parent = self.expr_parent;
 
        conditional_expr.parent = old_expr_parent;
 

	
 
        self.expr_parent = ExpressionParent::Expression(upcast_id, 0);
 
        self.visit_expr(ctx, test_expr_id)?;
 
        self.expr_parent = ExpressionParent::Expression(upcast_id, 1);
 
        self.visit_expr(ctx, true_expr_id)?;
 
        self.expr_parent = ExpressionParent::Expression(upcast_id, 2);
 
        self.visit_expr(ctx, false_expr_id)?;
 
        self.expr_parent = old_expr_parent;
 

	
 
        Ok(())
 
    }
 

	
 
    fn visit_binary_expr(&mut self, ctx: &mut Ctx, id: BinaryExpressionId) -> VisitorResult {
 
        let upcast_id = id.upcast();
 
        let binary_expr = &mut ctx.heap[id];
 
        let left_expr_id = binary_expr.left;
 
        let right_expr_id = binary_expr.right;
 

	
 
        let old_expr_parent = self.expr_parent;
 
        binary_expr.parent = old_expr_parent;
 

	
 
        self.expr_parent = ExpressionParent::Expression(upcast_id, 0);
 
        self.visit_expr(ctx, left_expr_id)?;
 
        self.expr_parent = ExpressionParent::Expression(upcast_id, 1);
 
        self.visit_expr(ctx, right_expr_id)?;
 
        self.expr_parent = old_expr_parent;
 

	
 
        Ok(())
 
    }
 

	
 
    fn visit_unary_expr(&mut self, ctx: &mut Ctx, id: UnaryExpressionId) -> VisitorResult {
 
        let unary_expr = &mut ctx.heap[id];
 
        let expr_id = unary_expr.expression;
 

	
 
        let old_expr_parent = self.expr_parent;
 
        unary_expr.parent = old_expr_parent;
 

	
 
        self.expr_parent = ExpressionParent::Expression(id.upcast(), 0);
 
        self.visit_expr(ctx, expr_id)?;
 
        self.expr_parent = old_expr_parent;
 

	
 
        Ok(())
 
    }
 

	
 
    fn visit_indexing_expr(&mut self, ctx: &mut Ctx, id: IndexingExpressionId) -> VisitorResult {
 
        let upcast_id = id.upcast();
 
        let indexing_expr = &mut ctx.heap[id];
 

	
 
        let subject_expr_id = indexing_expr.subject;
 
        let index_expr_id = indexing_expr.index;
 

	
 
        let old_expr_parent = self.expr_parent;
 
        indexing_expr.parent = old_expr_parent;
 

	
 
        self.expr_parent = ExpressionParent::Expression(upcast_id, 0);
 
        self.visit_expr(ctx, subject_expr_id)?;
 
        self.expr_parent = ExpressionParent::Expression(upcast_id, 1);
 
        self.visit_expr(ctx, index_expr_id)?;
 
        self.expr_parent = old_expr_parent;
 

	
 
        Ok(())
 
    }
 

	
 
    fn visit_slicing_expr(&mut self, ctx: &mut Ctx, id: SlicingExpressionId) -> VisitorResult {
 
        let upcast_id = id.upcast();
 
        let slicing_expr = &mut ctx.heap[id];
 

	
 
        let subject_expr_id = slicing_expr.subject;
 
        let from_expr_id = slicing_expr.from_index;
 
        let to_expr_id = slicing_expr.to_index;
 

	
 
        let old_expr_parent = self.expr_parent;
 
        slicing_expr.parent = old_expr_parent;
 

	
 
        self.expr_parent = ExpressionParent::Expression(upcast_id, 0);
 
        self.visit_expr(ctx, subject_expr_id)?;
 
        self.expr_parent = ExpressionParent::Expression(upcast_id, 1);
 
        self.visit_expr(ctx, from_expr_id)?;
 
        self.expr_parent = ExpressionParent::Expression(upcast_id, 2);
 
        self.visit_expr(ctx, to_expr_id)?;
 
        self.expr_parent = old_expr_parent;
 

	
 
        Ok(())
 
    }
 

	
 
    fn visit_select_expr(&mut self, ctx: &mut Ctx, id: SelectExpressionId) -> VisitorResult {
 
        let select_expr = &mut ctx.heap[id];
 
        let expr_id = select_expr.subject;
 

	
 
        let old_expr_parent = self.expr_parent;
 
        select_expr.parent = old_expr_parent;
 

	
 
        self.expr_parent = ExpressionParent::Expression(id.upcast(), 0);
 
        self.visit_expr(ctx, expr_id)?;
 
        self.expr_parent = old_expr_parent;
 

	
 
        Ok(())
 
    }
 

	
 
    fn visit_literal_expr(&mut self, ctx: &mut Ctx, id: LiteralExpressionId) -> VisitorResult {
 
        let literal_expr = &mut ctx.heap[id];
 
        let old_expr_parent = self.expr_parent;
 
        literal_expr.parent = old_expr_parent;
 

	
 
        match &mut literal_expr.value {
 
            Literal::Null | Literal::True | Literal::False |
 
            Literal::Character(_) | Literal::String(_) | Literal::Integer(_) => {
 
                // Just the parent has to be set, done above
 
            },
 
            Literal::Struct(literal) => {
 
                let upcast_id = id.upcast();
 
                // Retrieve type definition
 
                let type_definition = ctx.types.get_base_definition(&literal.definition).unwrap();
 
                let struct_definition = type_definition.definition.as_struct();
 

	
 
                // Make sure all fields are specified, none are specified twice
 
                // and all fields exist on the struct definition
 
                let mut specified = Vec::new(); // TODO: @performance
 
                specified.resize(struct_definition.fields.len(), false);
 

	
 
                for field in &mut literal.fields {
 
                    // Find field in the struct definition
 
                    let field_idx = struct_definition.fields.iter().position(|v| v.identifier == field.identifier);
 
                    if field_idx.is_none() {
 
                        let field_span = field.identifier.span;
 
                        let literal = ctx.heap[id].value.as_struct();
 
                        let ast_definition = &ctx.heap[literal.definition];
 
                        return Err(ParseError::new_error_at_span(
 
                            &ctx.module.source, field_span, format!(
 
                                "This field does not exist on the struct '{}'",
 
                                ast_definition.identifier().value.as_str()
 
                            )
 
                        ));
 
                    }
 
                    field.field_idx = field_idx.unwrap();
 

	
 
                    // Check if specified more than once
 
                    if specified[field.field_idx] {
 
                        return Err(ParseError::new_error_str_at_span(
 
                            &ctx.module.source, field.identifier.span,
 
                            "This field is specified more than once"
 
                        ));
 
                    }
 

	
 
                    specified[field.field_idx] = true;
 
                }
 

	
 
                if !specified.iter().all(|v| *v) {
 
                    // Some fields were not specified
 
                    let mut not_specified = String::new();
 
                    let mut num_not_specified = 0;
 
                    for (def_field_idx, is_specified) in specified.iter().enumerate() {
 
                        if !is_specified {
 
                            if !not_specified.is_empty() { not_specified.push_str(", ") }
 
                            let field_ident = &struct_definition.fields[def_field_idx].identifier;
 
                            not_specified.push_str(field_ident.value.as_str());
 
                            num_not_specified += 1;
 
                        }
 
                    }
 

	
 
                    debug_assert!(num_not_specified > 0);
 
                    let msg = if num_not_specified == 1 {
 
                        format!("not all fields are specified, '{}' is missing", not_specified)
 
                    } else {
 
                        format!("not all fields are specified, [{}] are missing", not_specified)
 
                    };
 
                    return Err(ParseError::new_error_at_span(
 
                        &ctx.module.source, literal.parser_type.elements[0].full_span, msg
 
                    ));
 
                }
 

	
 
                // Need to traverse fields expressions in struct and evaluate
 
                // the poly args
 
                let mut expr_section = self.expression_buffer.start_section();
 
                for field in &literal.fields {
 
                    expr_section.push(field.value);
 
                }
 

	
 
                for expr_idx in 0..expr_section.len() {
 
                    let expr_id = expr_section[expr_idx];
 
                    self.expr_parent = ExpressionParent::Expression(upcast_id, expr_idx as u32);
 
                    self.visit_expr(ctx, expr_id)?;
 
                }
 

	
 
                expr_section.forget();
 
            },
 
            Literal::Enum(literal) => {
 
                // Make sure the variant exists
 
                let type_definition = ctx.types.get_base_definition(&literal.definition).unwrap();
 
                let enum_definition = type_definition.definition.as_enum();
 

	
 
                let variant_idx = enum_definition.variants.iter().position(|v| {
 
                    v.identifier == literal.variant
 
                });
 

	
 
                if variant_idx.is_none() {
 
                    let literal = ctx.heap[id].value.as_enum();
 
                    let ast_definition = ctx.heap[literal.definition].as_enum();
 
                    return Err(ParseError::new_error_at_span(
 
                        &ctx.module.source, literal.parser_type.elements[0].full_span, format!(
 
                            "the variant '{}' does not exist on the enum '{}'",
 
                            literal.variant.value.as_str(), ast_definition.identifier.value.as_str()
 
                        )
 
                    ));
 
                }
 

	
 
                literal.variant_idx = variant_idx.unwrap();
 
            },
 
            Literal::Union(literal) => {
 
                // Make sure the variant exists
 
                let type_definition = ctx.types.get_base_definition(&literal.definition).unwrap();
 
                let union_definition = type_definition.definition.as_union();
 

	
 
                let variant_idx = union_definition.variants.iter().position(|v| {
 
                    v.identifier == literal.variant
 
                });
 
                if variant_idx.is_none() {
 
                    let literal = ctx.heap[id].value.as_union();
 
                    let ast_definition = ctx.heap[literal.definition].as_union();
 
                    return Err(ParseError::new_error_at_span(
 
                        &ctx.module.source, literal.parser_type.elements[0].full_span, format!(
 
                            "the variant does '{}' does not exist on the union '{}'",
 
                            literal.variant.value.as_str(), ast_definition.identifier.value.as_str()
 
                        )
 
                    ));
 
                }
 

	
 
                literal.variant_idx = variant_idx.unwrap();
 

	
 
                // Make sure the number of specified values matches the expected
 
                // number of embedded values in the union variant.
 
                let union_variant = &union_definition.variants[literal.variant_idx];
 
                if union_variant.embedded.len() != literal.values.len() {
 
                    let literal = ctx.heap[id].value.as_union();
 
                    let ast_definition = ctx.heap[literal.definition].as_union();
 
                    return Err(ParseError::new_error_at_span(
 
                        &ctx.module.source, literal.parser_type.elements[0].full_span, format!(
 
                            "The variant '{}' of union '{}' expects {} embedded values, but {} were specified",
 
                            literal.variant.value.as_str(), ast_definition.identifier.value.as_str(),
 
                            union_variant.embedded.len(), literal.values.len()
 
                        ),
 
                    ))
 
                }
 

	
 
                // Traverse embedded values of union (if any) and evaluate the
 
                // polymorphic arguments
 
                let upcast_id = id.upcast();
 
                let mut expr_section = self.expression_buffer.start_section();
 
                for value in &literal.values {
 
                    expr_section.push(*value);
 
                }
 

	
 
                for expr_idx in 0..expr_section.len() {
 
                    let expr_id = expr_section[expr_idx];
 
                    self.expr_parent = ExpressionParent::Expression(upcast_id, expr_idx as u32);
 
                    self.visit_expr(ctx, expr_id)?;
 
                }
 

	
 
                expr_section.forget();
 
            },
 
            Literal::Array(literal) => {
 
                // Visit all expressions in the array
 
                let upcast_id = id.upcast();
 
                let expr_section = self.expression_buffer.start_section_initialized(literal);
 
                for expr_idx in 0..expr_section.len() {
 
                    let expr_id = expr_section[expr_idx];
 
                    self.expr_parent = ExpressionParent::Expression(upcast_id, expr_idx as u32);
 
                    self.visit_expr(ctx, expr_id)?;
 
                }
 

	
 
                expr_section.forget();
 
            }
 
        }
 

	
 
        self.expr_parent = old_expr_parent;
 

	
 
        Ok(())
 
    }
 

	
 
    fn visit_call_expr(&mut self, ctx: &mut Ctx, id: CallExpressionId) -> VisitorResult {
 
        let call_expr = &mut ctx.heap[id];
 

	
 
        // Check whether the method is allowed to be called within the code's
 
        // context (in sync, definition type, etc.)
 
        let mut expected_wrapping_new_stmt = false;
 
        match &mut call_expr.method {
 
            Method::Get => {
 
                if !self.def_type.is_primitive() {
 
                    return Err(ParseError::new_error_str_at_span(
 
                        &ctx.module.source, call_expr.span,
 
                        "a call to 'get' may only occur in primitive component definitions"
 
                    ));
 
                }
 
                if self.in_sync.is_none() {
 
                    return Err(ParseError::new_error_str_at_span(
 
                        &ctx.module.source, call_expr.span,
 
                        "a call to 'get' may only occur inside synchronous blocks"
 
                    ));
 
                }
 
            },
 
            Method::Put => {
 
                if !self.def_type.is_primitive() {
 
                    return Err(ParseError::new_error_str_at_span(
 
                        &ctx.module.source, call_expr.span,
 
                        "a call to 'put' may only occur in primitive component definitions"
 
                    ));
 
                }
 
                if self.in_sync.is_none() {
 
                    return Err(ParseError::new_error_str_at_span(
 
                        &ctx.module.source, call_expr.span,
 
                        "a call to 'put' may only occur inside synchronous blocks"
 
                    ));
 
                }
 
            },
 
            Method::Fires => {
 
                if !self.def_type.is_primitive() {
 
                    return Err(ParseError::new_error_str_at_span(
 
                        &ctx.module.source, call_expr.span,
 
                        "a call to 'fires' may only occur in primitive component definitions"
 
                    ));
 
                }
 
                if self.in_sync.is_none() {
 
                    return Err(ParseError::new_error_str_at_span(
 
                        &ctx.module.source, call_expr.span,
 
                        "a call to 'fires' may only occur inside synchronous blocks"
 
                    ));
 
                }
 
            },
 
            Method::Create => {},
 
            Method::Length => {},
 
            Method::Assert => {
 
                if self.def_type.is_function() {
 
                    return Err(ParseError::new_error_str_at_span(
 
                        &ctx.module.source, call_expr.span,
 
                        "assert statement may only occur in components"
 
                    ));
 
                }
 
                if self.in_sync.is_none() {
 
                    return Err(ParseError::new_error_str_at_span(
 
                        &ctx.module.source, call_expr.span,
 
                        "assert statements may only occur inside synchronous blocks"
 
                    ));
 
                }
 
            },
 
            Method::UserFunction => {},
 
            Method::UserComponent => {
 
                expected_wrapping_new_stmt = true;
 
            },
 
        }
 

	
 
        if expected_wrapping_new_stmt {
 
            if !self.expr_parent.is_new() {
 
                return Err(ParseError::new_error_str_at_span(
 
                    &ctx.module.source, call_expr.span,
 
                    "cannot call a component, it can only be instantiated by using 'new'"
 
                ));
 
            }
 
        } else {
 
            if self.expr_parent.is_new() {
 
                return Err(ParseError::new_error_str_at_span(
 
                    &ctx.module.source, call_expr.span,
 
                    "only components can be instantiated, this is a function"
 
                ));
 
            }
 
        }
 

	
 
        // Check the number of arguments
 
        let call_definition = ctx.types.get_base_definition(&call_expr.definition).unwrap();
 
        let num_expected_args = match &call_definition.definition {
 
            DefinedTypeVariant::Function(definition) => definition.arguments.len(),
 
            DefinedTypeVariant::Component(definition) => definition.arguments.len(),
 
            v => unreachable!("encountered {} type in call expression", v.type_class()),
 
        };
 

	
 
        let num_provided_args = call_expr.arguments.len();
 
        if num_provided_args != num_expected_args {
 
            let argument_text = if num_expected_args == 1 { "argument" } else { "arguments" };
 
            return Err(ParseError::new_error_at_span(
 
                &ctx.module.source, call_expr.span, format!(
 
                    "expected {} {}, but {} were provided",
 
                    num_expected_args, argument_text, num_provided_args
 
                )
 
            ));
 
        }
 

	
 
        // Recurse into all of the arguments and set the expression's parent
 
        let upcast_id = id.upcast();
 

	
 
        let section = self.expression_buffer.start_section_initialized(&call_expr.arguments);
 
        let old_expr_parent = self.expr_parent;
 
        call_expr.parent = old_expr_parent;
 

	
 
        for arg_expr_idx in 0..section.len() {
 
            let arg_expr_id = section[arg_expr_idx];
 
            self.expr_parent = ExpressionParent::Expression(upcast_id, arg_expr_idx as u32);
 
            self.visit_expr(ctx, arg_expr_id)?;
 
        }
 

	
 
        section.forget();
 
        self.expr_parent = old_expr_parent;
 

	
 
        Ok(())
 
    }
 

	
 
    fn visit_variable_expr(&mut self, ctx: &mut Ctx, id: VariableExpressionId) -> VisitorResult {
 
        let var_expr = &ctx.heap[id];
 
        let variable_id = self.find_variable(ctx, self.relative_pos_in_block, &var_expr.identifier)?;
 
        let var_expr = &mut ctx.heap[id];
 
        var_expr.declaration = Some(variable_id);
 
        var_expr.parent = self.expr_parent;
 

	
 
        Ok(())
 
    }
 
}
 

	
 
impl PassValidationLinking {
 
    //--------------------------------------------------------------------------
 
    // Special traversal
 
    //--------------------------------------------------------------------------
 

	
 
    fn visit_block_stmt_with_hint(&mut self, ctx: &mut Ctx, id: BlockStatementId, hint: Option<SynchronousStatementId>) -> VisitorResult {
 
        // Set parent scope and relative position in the parent scope. Remember
 
        // these values to set them back to the old values when we're done with
 
        // the traversal of the block's statements.
 
        let body = &mut ctx.heap[id];
 
        body.parent_scope = self.cur_scope.clone();
 
        body.relative_pos_in_parent = self.relative_pos_in_block;
 

	
 
        let old_scope = self.cur_scope.replace(match hint {
 
            Some(sync_id) => Scope::Synchronous((sync_id, id)),
 
            None => Scope::Regular(id),
 
        });
 
        let old_relative_pos = self.relative_pos_in_block;
 

	
 
        // Copy statement IDs into buffer
 
        let statement_section = self.statement_buffer.start_section_initialized(&body.statements);
 

	
 
        // Perform the breadth-first pass. Its main purpose is to find labeled
 
        // statements such that we can find the `goto`-targets immediately when
 
        // performing the depth pass
 
        for stmt_idx in 0..statement_section.len() {
 
            self.relative_pos_in_block = stmt_idx as u32;
 
            self.visit_statement_for_locals_labels_and_in_sync(ctx, self.relative_pos_in_block, statement_section[stmt_idx])?;
 
        }
 

	
 
        for stmt_idx in 0..statement_section.len() {
 
            self.relative_pos_in_block = stmt_idx as u32;
 
            self.visit_stmt(ctx, statement_section[stmt_idx])?;
 
        }
 

	
 
        self.cur_scope = old_scope;
 
        self.relative_pos_in_block = old_relative_pos;
 
        statement_section.forget();
 

	
 
        Ok(())
 
    }
 

	
 
    fn visit_statement_for_locals_labels_and_in_sync(&mut self, ctx: &mut Ctx, relative_pos: u32, id: StatementId) -> VisitorResult {
 
        let statement = &mut ctx.heap[id];
 
        match statement {
 
            Statement::Local(stmt) => {
 
                match stmt {
 
                    LocalStatement::Memory(local) => {
 
                        let variable_id = local.variable;
 
                        self.checked_local_add(ctx, relative_pos, variable_id)?;
 
                    },
 
                    LocalStatement::Channel(local) => {
 
                        let from_id = local.from;
 
                        let to_id = local.to;
 
                        self.checked_local_add(ctx, relative_pos, from_id)?;
 
                        self.checked_local_add(ctx, relative_pos, to_id)?;
 
                    }
 
                }
 
            }
 
            Statement::Labeled(stmt) => {
 
                let stmt_id = stmt.this;
 
                let body_id = stmt.body;
 
                self.checked_label_add(ctx, relative_pos, self.in_sync, stmt_id)?;
 
                self.visit_statement_for_locals_labels_and_in_sync(ctx, relative_pos, body_id)?;
 
            },
 
            Statement::While(stmt) => {
 
                stmt.in_sync = self.in_sync;
 
            },
 
            _ => {},
 
        }
 

	
 
        return Ok(())
 
    }
 

	
 
    //--------------------------------------------------------------------------
 
    // Utilities
 
    //--------------------------------------------------------------------------
 

	
 
    /// Adds a local variable to the current scope. It will also annotate the
 
    /// `Local` in the AST with its relative position in the block.
 
    fn checked_local_add(&mut self, ctx: &mut Ctx, relative_pos: u32, id: LocalId) -> Result<(), ParseError> {
 
        debug_assert!(self.cur_scope.is_some());
 

	
 
        // Make sure we do not conflict with any global symbols
 
        let cur_scope = SymbolScope::Definition(self.def_type.definition_id());
 
        {
 
            let ident = &ctx.heap[id].identifier;
 
            if let Some(symbol) = ctx.symbols.get_symbol_by_name(cur_scope, &ident.value.as_bytes()) {
 
                return Err(ParseError::new_error_str_at_span(
 
                    &ctx.module.source, ident.span,
 
                    "local variable declaration conflicts with symbol"
 
                ).with_info_str_at_span(
 
                    &ctx.module.source, symbol.variant.span_of_introduction(&ctx.heap), "the conflicting symbol is introduced here"
 
                ));
 
            }
src/protocol/parser/token_parsing.rs
Show inline comments
 
use crate::collections::ScopedSection;
 
use crate::protocol::ast::*;
 
use crate::protocol::input_source::{
 
    InputSource as InputSource,
 
    InputPosition as InputPosition,
 
    InputSpan,
 
    ParseError,
 
};
 
use super::tokens::*;
 
use super::symbol_table::*;
 
use super::{Module, PassCtx};
 

	
 
// Keywords
 
pub(crate) const KW_LET:       &'static [u8] = b"let";
 
pub(crate) const KW_AS:        &'static [u8] = b"as";
 
pub(crate) const KW_STRUCT:    &'static [u8] = b"struct";
 
pub(crate) const KW_ENUM:      &'static [u8] = b"enum";
 
pub(crate) const KW_UNION:     &'static [u8] = b"union";
 
pub(crate) const KW_FUNCTION:  &'static [u8] = b"function";
 
pub(crate) const KW_FUNCTION:  &'static [u8] = b"func";
 
pub(crate) const KW_PRIMITIVE: &'static [u8] = b"primitive";
 
pub(crate) const KW_COMPOSITE: &'static [u8] = b"composite";
 
pub(crate) const KW_IMPORT:    &'static [u8] = b"import";
 

	
 
// Keywords - literals
 
pub(crate) const KW_LIT_TRUE:  &'static [u8] = b"true";
 
pub(crate) const KW_LIT_FALSE: &'static [u8] = b"false";
 
pub(crate) const KW_LIT_NULL:  &'static [u8] = b"null";
 

	
 
// Keywords - functions
 
pub(crate) const KW_FUNC_GET:    &'static [u8] = b"get";
 
pub(crate) const KW_FUNC_PUT:    &'static [u8] = b"put";
 
pub(crate) const KW_FUNC_FIRES:  &'static [u8] = b"fires";
 
pub(crate) const KW_FUNC_CREATE: &'static [u8] = b"create";
 
pub(crate) const KW_FUNC_LENGTH: &'static [u8] = b"length";
 
pub(crate) const KW_FUNC_ASSERT: &'static [u8] = b"assert";
 

	
 
// Keywords - statements
 
pub(crate) const KW_STMT_CHANNEL:  &'static [u8] = b"channel";
 
pub(crate) const KW_STMT_IF:       &'static [u8] = b"if";
 
pub(crate) const KW_STMT_ELSE:     &'static [u8] = b"else";
 
pub(crate) const KW_STMT_WHILE:    &'static [u8] = b"while";
 
pub(crate) const KW_STMT_BREAK:    &'static [u8] = b"break";
 
pub(crate) const KW_STMT_CONTINUE: &'static [u8] = b"continue";
 
pub(crate) const KW_STMT_GOTO:     &'static [u8] = b"goto";
 
pub(crate) const KW_STMT_RETURN:   &'static [u8] = b"return";
 
pub(crate) const KW_STMT_SYNC:     &'static [u8] = b"synchronous";
 
pub(crate) const KW_STMT_NEW:      &'static [u8] = b"new";
 

	
 
// Keywords - types
 
// Since types are needed for returning diagnostic information to the user, the
 
// string variants are put here as well.
 
pub(crate) const KW_TYPE_IN_PORT_STR:  &'static str = "in";
 
pub(crate) const KW_TYPE_OUT_PORT_STR: &'static str = "out";
 
pub(crate) const KW_TYPE_MESSAGE_STR:  &'static str = "msg";
 
pub(crate) const KW_TYPE_BOOL_STR:     &'static str = "bool";
 
pub(crate) const KW_TYPE_UINT8_STR:    &'static str = "u8";
 
pub(crate) const KW_TYPE_UINT16_STR:   &'static str = "u16";
 
pub(crate) const KW_TYPE_UINT32_STR:   &'static str = "u32";
 
pub(crate) const KW_TYPE_UINT64_STR:   &'static str = "u64";
 
pub(crate) const KW_TYPE_SINT8_STR:    &'static str = "s8";
 
pub(crate) const KW_TYPE_SINT16_STR:   &'static str = "s16";
 
pub(crate) const KW_TYPE_SINT32_STR:   &'static str = "s32";
 
pub(crate) const KW_TYPE_SINT64_STR:   &'static str = "s64";
 
pub(crate) const KW_TYPE_CHAR_STR:     &'static str = "char";
 
pub(crate) const KW_TYPE_STRING_STR:   &'static str = "string";
 
pub(crate) const KW_TYPE_INFERRED_STR: &'static str = "auto";
 

	
 
pub(crate) const KW_TYPE_IN_PORT:  &'static [u8] = KW_TYPE_IN_PORT_STR.as_bytes();
 
pub(crate) const KW_TYPE_OUT_PORT: &'static [u8] = KW_TYPE_OUT_PORT_STR.as_bytes();
 
pub(crate) const KW_TYPE_MESSAGE:  &'static [u8] = KW_TYPE_MESSAGE_STR.as_bytes();
 
pub(crate) const KW_TYPE_BOOL:     &'static [u8] = KW_TYPE_BOOL_STR.as_bytes();
 
pub(crate) const KW_TYPE_UINT8:    &'static [u8] = KW_TYPE_UINT8_STR.as_bytes();
 
pub(crate) const KW_TYPE_UINT16:   &'static [u8] = KW_TYPE_UINT16_STR.as_bytes();
 
pub(crate) const KW_TYPE_UINT32:   &'static [u8] = KW_TYPE_UINT32_STR.as_bytes();
 
pub(crate) const KW_TYPE_UINT64:   &'static [u8] = KW_TYPE_UINT64_STR.as_bytes();
 
pub(crate) const KW_TYPE_SINT8:    &'static [u8] = KW_TYPE_SINT8_STR.as_bytes();
 
pub(crate) const KW_TYPE_SINT16:   &'static [u8] = KW_TYPE_SINT16_STR.as_bytes();
 
pub(crate) const KW_TYPE_SINT32:   &'static [u8] = KW_TYPE_SINT32_STR.as_bytes();
 
pub(crate) const KW_TYPE_SINT64:   &'static [u8] = KW_TYPE_SINT64_STR.as_bytes();
 
pub(crate) const KW_TYPE_CHAR:     &'static [u8] = KW_TYPE_CHAR_STR.as_bytes();
 
pub(crate) const KW_TYPE_STRING:   &'static [u8] = KW_TYPE_STRING_STR.as_bytes();
 
pub(crate) const KW_TYPE_INFERRED: &'static [u8] = KW_TYPE_INFERRED_STR.as_bytes();
 

	
 
/// A special trait for when consuming comma-separated things such that we can
 
/// push them onto a `Vec` and onto a `ScopedSection`. As we monomorph for
 
/// very specific comma-separated cases I don't expect polymorph bloat.
 
/// Also, I really don't like this solution.
 
pub(crate) trait Extendable {
 
    type Value;
 

	
 
    fn push(&mut self, v: Self::Value);
 
}
 

	
 
impl<T> Extendable for Vec<T> {
 
    type Value = T;
 

	
 
    #[inline]
 
    fn push(&mut self, v: Self::Value) {
 
        (self as &mut Vec<T>).push(v);
 
    }
 
}
 

	
 
impl<T: Sized> Extendable for ScopedSection<T> {
 
    type Value = T;
 

	
 
    #[inline]
 
    fn push(&mut self, v: Self::Value) {
 
        (self as &mut ScopedSection<T>).push(v);
 
    }
 
}
 

	
 
/// Consumes a domain-name identifier: identifiers separated by a dot. For
 
/// simplification of later parsing and span identification the domain-name may
 
/// contain whitespace, but must reside on the same line.
 
pub(crate) fn consume_domain_ident<'a>(
 
    source: &'a InputSource, iter: &mut TokenIter
 
) -> Result<(&'a [u8], InputSpan), ParseError> {
 
    let (_, mut span) = consume_ident(source, iter)?;
 
    while let Some(TokenKind::Dot) = iter.next() {
 
        iter.consume();
 
        let (_, new_span) = consume_ident(source, iter)?;
 
        span.end = new_span.end;
 
    }
 

	
 
    // Not strictly necessary, but probably a reasonable restriction: this
 
    // simplifies parsing of module naming and imports.
 
    if span.begin.line != span.end.line {
 
        return Err(ParseError::new_error_str_at_span(source, span, "module names may not span multiple lines"));
 
    }
 

	
 
    // If module name consists of a single identifier, then it may not match any
 
    // of the reserved keywords
 
    let section = source.section_at_pos(span.begin, span.end);
 
    if is_reserved_keyword(section) {
 
        return Err(ParseError::new_error_str_at_span(source, span, "encountered reserved keyword"));
 
    }
 

	
 
    Ok((source.section_at_pos(span.begin, span.end), span))
 
}
 

	
 
/// Consumes a specific expected token. Be careful to only call this with tokens
 
/// that do not have a variable length.
 
pub(crate) fn consume_token(source: &InputSource, iter: &mut TokenIter, expected: TokenKind) -> Result<InputSpan, ParseError> {
 
    if Some(expected) != iter.next() {
 
        return Err(ParseError::new_error_at_pos(
 
            source, iter.last_valid_pos(),
 
            format!("expected '{}'", expected.token_chars())
 
        ));
 
    }
 
    let span = iter.next_span();
 
    iter.consume();
 
    Ok(span)
 
}
 

	
 
/// Consumes a comma separated list until the closing delimiter is encountered
 
pub(crate) fn consume_comma_separated_until<T, F, E>(
 
    close_delim: TokenKind, source: &InputSource, iter: &mut TokenIter, ctx: &mut PassCtx,
 
    mut consumer_fn: F, target: &mut E, item_name_and_article: &'static str,
 
    close_pos: Option<&mut InputPosition>
 
) -> Result<(), ParseError>
 
    where F: FnMut(&InputSource, &mut TokenIter, &mut PassCtx) -> Result<T, ParseError>,
 
          E: Extendable<Value=T>
 
{
 
    let mut had_comma = true;
 
    let mut next;
 
    loop {
 
        next = iter.next();
 
        if Some(close_delim) == next {
 
            if let Some(close_pos) = close_pos {
 
                // If requested return the position of the closing delimiter
 
                let (_, new_close_pos) = iter.next_positions();
 
                *close_pos = new_close_pos;
 
            }
 
            iter.consume();
 
            break;
 
        } else if !had_comma || next.is_none() {
 
            return Err(ParseError::new_error_at_pos(
 
                source, iter.last_valid_pos(),
 
                format!("expected a '{}', or {}", close_delim.token_chars(), item_name_and_article)
 
            ));
 
        }
 

	
 
        let new_item = consumer_fn(source, iter, ctx)?;
 
        target.push(new_item);
 

	
 
        next = iter.next();
 
        had_comma = next == Some(TokenKind::Comma);
 
        if had_comma {
 
            iter.consume();
 
        }
 
    }
 

	
 
    Ok(())
 
}
 

	
 
/// Consumes a comma-separated list of items if the opening delimiting token is
 
/// encountered. If not, then the iterator will remain at its current position.
 
/// Note that the potential cases may be:
 
/// - No opening delimiter encountered, then we return `false`.
 
/// - Both opening and closing delimiter encountered, but no items.
 
/// - Opening and closing delimiter encountered, and items were processed.
 
/// - Found an opening delimiter, but processing an item failed.
 
pub(crate) fn maybe_consume_comma_separated<T, F, E>(
 
    open_delim: TokenKind, close_delim: TokenKind, source: &InputSource, iter: &mut TokenIter, ctx: &mut PassCtx,
 
    consumer_fn: F, target: &mut E, item_name_and_article: &'static str,
 
    close_pos: Option<&mut InputPosition>
 
) -> Result<bool, ParseError>
 
    where F: FnMut(&InputSource, &mut TokenIter, &mut PassCtx) -> Result<T, ParseError>,
 
          E: Extendable<Value=T>
 
{
 
    if Some(open_delim) != iter.next() {
 
        return Ok(false);
 
    }
 

	
 
    // Opening delimiter encountered, so must parse the comma-separated list.
 
    iter.consume();
 
    consume_comma_separated_until(close_delim, source, iter, ctx, consumer_fn, target, item_name_and_article, close_pos)?;
 

	
 
    Ok(true)
 
}
 

	
 
pub(crate) fn maybe_consume_comma_separated_spilled<F: FnMut(&InputSource, &mut TokenIter, &mut PassCtx) -> Result<(), ParseError>>(
 
    open_delim: TokenKind, close_delim: TokenKind, source: &InputSource,
 
    iter: &mut TokenIter, ctx: &mut PassCtx,
 
    mut consumer_fn: F, item_name_and_article: &'static str
 
) -> Result<bool, ParseError> {
 
    let mut next = iter.next();
 
    if Some(open_delim) != next {
 
        return Ok(false);
 
    }
 

	
 
    iter.consume();
 
    let mut had_comma = true;
 
    loop {
 
        next = iter.next();
 
        if Some(close_delim) == next {
 
            iter.consume();
 
            break;
 
        } else if !had_comma {
 
            return Err(ParseError::new_error_at_pos(
 
                source, iter.last_valid_pos(),
 
                format!("expected a '{}', or {}", close_delim.token_chars(), item_name_and_article)
 
            ));
 
        }
 

	
 
        consumer_fn(source, iter, ctx)?;
 
        next = iter.next();
 
        had_comma = next == Some(TokenKind::Comma);
 
        if had_comma {
 
            iter.consume();
 
        }
 
    }
 

	
 
    Ok(true)
 
}
 

	
 
/// Consumes a comma-separated list and expected the opening and closing
 
/// characters to be present. The returned array may still be empty
 
pub(crate) fn consume_comma_separated<T, F, E>(
 
    open_delim: TokenKind, close_delim: TokenKind, source: &InputSource,
 
    iter: &mut TokenIter, ctx: &mut PassCtx,
 
    consumer_fn: F, target: &mut E, item_name_and_article: &'static str,
 
    list_name_and_article: &'static str, close_pos: Option<&mut InputPosition>
 
) -> Result<(), ParseError>
 
    where F: FnMut(&InputSource, &mut TokenIter, &mut PassCtx) -> Result<T, ParseError>,
 
          E: Extendable<Value=T>
 
{
 
    let first_pos = iter.last_valid_pos();
 
    match maybe_consume_comma_separated(
 
        open_delim, close_delim, source, iter, ctx, consumer_fn, target,
 
        item_name_and_article, close_pos
 
    ) {
 
        Ok(true) => Ok(()),
 
        Ok(false) => {
 
            return Err(ParseError::new_error_at_pos(
 
                source, first_pos,
 
                format!("expected {}", list_name_and_article)
 
            ));
 
        },
 
        Err(err) => Err(err)
 
    }
 
}
 

	
 
/// Consumes an integer literal, may be binary, octal, hexadecimal or decimal,
 
/// and may have separating '_'-characters.
 
pub(crate) fn consume_integer_literal(source: &InputSource, iter: &mut TokenIter, buffer: &mut String) -> Result<(u64, InputSpan), ParseError> {
 
    if Some(TokenKind::Integer) != iter.next() {
 
        return Err(ParseError::new_error_str_at_pos(source, iter.last_valid_pos(), "expected an integer literal"));
 
    }
 
    let integer_span = iter.next_span();
 
    iter.consume();
 

	
 
    let integer_text = source.section_at_span(integer_span);
 

	
 
    // Determine radix and offset from prefix
 
    let (radix, input_offset, radix_name) =
 
        if integer_text.starts_with(b"0b") || integer_text.starts_with(b"0B") {
 
            // Binary number
 
            (2, 2, "binary")
 
        } else if integer_text.starts_with(b"0o") || integer_text.starts_with(b"0O") {
 
            // Octal number
 
            (8, 2, "octal")
 
        } else if integer_text.starts_with(b"0x") || integer_text.starts_with(b"0X") {
 
            // Hexadecimal number
 
            (16, 2, "hexadecimal")
 
        } else {
 
            (10, 0, "decimal")
 
        };
 

	
 
    // Take out any of the separating '_' characters
 
    buffer.clear();
 
    for char_idx in input_offset..integer_text.len() {
 
        let char = integer_text[char_idx];
 
        if char == b'_' {
 
            continue;
 
        }
 
        if !char.is_ascii_digit() {
 
            return Err(ParseError::new_error_at_span(
 
                source, integer_span,
 
                format!("incorrectly formatted {} number", radix_name)
 
            ));
 
        }
 
        buffer.push(char::from(char));
 
    }
 

	
 
    // Use the cleaned up string to convert to integer
 
    match u64::from_str_radix(&buffer, radix) {
 
        Ok(number) => Ok((number, integer_span)),
 
        Err(_) => Err(ParseError::new_error_at_span(
 
            source, integer_span,
 
            format!("incorrectly formatted {} number", radix_name)
 
        )),
 
    }
 
}
 

	
 
/// Consumes a character literal. We currently support a limited number of
 
/// backslash-escaped characters
 
pub(crate) fn consume_character_literal(
 
    source: &InputSource, iter: &mut TokenIter
 
) -> Result<(char, InputSpan), ParseError> {
 
    if Some(TokenKind::Character) != iter.next() {
 
        return Err(ParseError::new_error_str_at_pos(source, iter.last_valid_pos(), "expected a character literal"));
 
    }
 
    let span = iter.next_span();
 
    iter.consume();
 

	
 
    let char_text = source.section_at_span(span);
 
    if !char_text.is_ascii() {
 
        return Err(ParseError::new_error_str_at_span(
 
            source, span, "expected an ASCII character literal"
 
        ));
 
    }
 

	
 
    match char_text.len() {
 
        0 => return Err(ParseError::new_error_str_at_span(source, span, "too little characters in character literal")),
 
        1 => {
 
            // We already know the text is ascii, so just throw an error if we have the escape
 
            // character.
 
            if char_text[0] == b'\\' {
 
                return Err(ParseError::new_error_str_at_span(source, span, "escape character without subsequent character"));
 
            }
 
            return Ok((char_text[0] as char, span));
 
        },
 
        2 => {
 
            if char_text[0] == b'\\' {
 
                let result = parse_escaped_character(source, iter.last_valid_pos(), char_text[1])?;
 
                return Ok((result, span))
 
            }
 
        },
 
        _ => {}
 
    }
 

	
 
    return Err(ParseError::new_error_str_at_span(source, span, "too many characters in character literal"))
 
}
 

	
 
/// Consumes a string literal. We currently support a limited number of
 
/// backslash-escaped characters. Note that the result is stored in the
 
/// buffer.
 
pub(crate) fn consume_string_literal(
 
    source: &InputSource, iter: &mut TokenIter, buffer: &mut String
 
) -> Result<InputSpan, ParseError> {
 
    if Some(TokenKind::String) != iter.next() {
 
        return Err(ParseError::new_error_str_at_pos(source, iter.last_valid_pos(), "expected a string literal"));
 
    }
 

	
 
    buffer.clear();
 
    let span = iter.next_span();
 
    iter.consume();
 

	
 
    let text = source.section_at_span(span);
 
    if !text.is_ascii() {
 
        return Err(ParseError::new_error_str_at_span(source, span, "expected an ASCII string literal"));
 
    }
 
    buffer.reserve(text.len());
 

	
 
    let mut was_escape = false;
 
    for idx in 0..text.len() {
 
        let cur = text[idx];
 
        if cur != b'\\' {
 
            if was_escape {
 
                let to_push = parse_escaped_character(source, iter.last_valid_pos(), cur)?;
 
                buffer.push(to_push);
 
            } else {
 
                buffer.push(cur as char);
 
            }
 
            was_escape = false;
 
        } else {
 
            was_escape = true;
 
        }
 
    }
 

	
 
    debug_assert!(!was_escape); // because otherwise we couldn't have ended the string literal
 

	
 
    Ok(span)
 
}
 

	
 
fn parse_escaped_character(source: &InputSource, pos: InputPosition, v: u8) -> Result<char, ParseError> {
 
    let result = match v {
 
        b'r' => '\r',
 
        b'n' => '\n',
 
        b't' => '\t',
 
        b'0' => '\0',
 
        b'\\' => '\\',
 
        b'\'' => '\'',
 
        b'"' => '"',
 
        v => return Err(ParseError::new_error_at_pos(
 
            source, pos, format!("unexpected escaped character '{}'", v)
 
        )),
 
    };
 
    Ok(result)
 
}
 

	
 
pub(crate) fn consume_pragma<'a>(source: &'a InputSource, iter: &mut TokenIter) -> Result<(&'a [u8], InputPosition, InputPosition), ParseError> {
 
    if Some(TokenKind::Pragma) != iter.next() {
 
        return Err(ParseError::new_error_str_at_pos(source, iter.last_valid_pos(), "expected a pragma"));
 
    }
 
    let (pragma_start, pragma_end) = iter.next_positions();
 
    iter.consume();
 
    Ok((source.section_at_pos(pragma_start, pragma_end), pragma_start, pragma_end))
 
}
 

	
 
pub(crate) fn has_ident(source: &InputSource, iter: &mut TokenIter, expected: &[u8]) -> bool {
 
    peek_ident(source, iter).map_or(false, |section| section == expected)
 
}
 

	
 
pub(crate) fn peek_ident<'a>(source: &'a InputSource, iter: &mut TokenIter) -> Option<&'a [u8]> {
 
    if Some(TokenKind::Ident) == iter.next() {
 
        let (start, end) = iter.next_positions();
 
        return Some(source.section_at_pos(start, end))
 
    }
 

	
 
    None
 
}
 

	
 
/// Consumes any identifier and returns it together with its span. Does not
 
/// check if the identifier is a reserved keyword.
 
pub(crate) fn consume_any_ident<'a>(
 
    source: &'a InputSource, iter: &mut TokenIter
 
) -> Result<(&'a [u8], InputSpan), ParseError> {
 
    if Some(TokenKind::Ident) != iter.next() {
 
        return Err(ParseError::new_error_str_at_pos(source, iter.last_valid_pos(), "expected an identifier"));
 
    }
 
    let (ident_start, ident_end) = iter.next_positions();
 
    iter.consume();
 
    Ok((source.section_at_pos(ident_start, ident_end), InputSpan::from_positions(ident_start, ident_end)))
 
}
 

	
 
/// Consumes a specific identifier. May or may not be a reserved keyword.
 
pub(crate) fn consume_exact_ident(source: &InputSource, iter: &mut TokenIter, expected: &[u8]) -> Result<InputSpan, ParseError> {
 
    let (ident, pos) = consume_any_ident(source, iter)?;
 
    if ident != expected {
 
        debug_assert!(expected.is_ascii());
 
        return Err(ParseError::new_error_at_pos(
 
            source, iter.last_valid_pos(),
 
            format!("expected the text '{}'", &String::from_utf8_lossy(expected))
 
        ));
 
    }
 
    Ok(pos)
 
}
 

	
 
/// Consumes an identifier that is not a reserved keyword and returns it
 
/// together with its span.
 
pub(crate) fn consume_ident<'a>(
 
    source: &'a InputSource, iter: &mut TokenIter
 
) -> Result<(&'a [u8], InputSpan), ParseError> {
 
    let (ident, span) = consume_any_ident(source, iter)?;
 
    if is_reserved_keyword(ident) {
 
        return Err(ParseError::new_error_str_at_span(source, span, "encountered reserved keyword"));
 
    }
 

	
 
    Ok((ident, span))
 
}
 

	
 
/// Consumes an identifier and immediately intern it into the `StringPool`
 
pub(crate) fn consume_ident_interned(
 
    source: &InputSource, iter: &mut TokenIter, ctx: &mut PassCtx
 
) -> Result<Identifier, ParseError> {
 
    let (value, span) = consume_ident(source, iter)?;
 
    let value = ctx.pool.intern(value);
 
    Ok(Identifier{ span, value })
 
}
 

	
 
fn is_reserved_definition_keyword(text: &[u8]) -> bool {
 
    match text {
 
        KW_STRUCT | KW_ENUM | KW_UNION | KW_FUNCTION | KW_PRIMITIVE | KW_COMPOSITE => true,
 
        _ => false,
 
    }
 
}
 

	
 
fn is_reserved_statement_keyword(text: &[u8]) -> bool {
 
    match text {
 
        KW_IMPORT | KW_AS |
 
        KW_STMT_CHANNEL | KW_STMT_IF | KW_STMT_WHILE |
 
        KW_STMT_BREAK | KW_STMT_CONTINUE | KW_STMT_GOTO | KW_STMT_RETURN |
 
        KW_STMT_SYNC | KW_STMT_NEW => true,
 
        _ => false,
 
    }
 
}
 

	
 
fn is_reserved_expression_keyword(text: &[u8]) -> bool {
 
    match text {
 
        KW_LET |
 
        KW_LIT_TRUE | KW_LIT_FALSE | KW_LIT_NULL |
 
        KW_FUNC_GET | KW_FUNC_PUT | KW_FUNC_FIRES | KW_FUNC_CREATE | KW_FUNC_ASSERT | KW_FUNC_LENGTH => true,
 
        _ => false,
 
    }
 
}
 

	
 
fn is_reserved_type_keyword(text: &[u8]) -> bool {
 
    match text {
 
        KW_TYPE_IN_PORT | KW_TYPE_OUT_PORT | KW_TYPE_MESSAGE | KW_TYPE_BOOL |
 
        KW_TYPE_UINT8 | KW_TYPE_UINT16 | KW_TYPE_UINT32 | KW_TYPE_UINT64 |
 
        KW_TYPE_SINT8 | KW_TYPE_SINT16 | KW_TYPE_SINT32 | KW_TYPE_SINT64 |
 
        KW_TYPE_CHAR | KW_TYPE_STRING |
 
        KW_TYPE_INFERRED => true,
 
        _ => false,
 
    }
 
}
 

	
 
fn is_reserved_keyword(text: &[u8]) -> bool {
 
    return
 
        is_reserved_definition_keyword(text) ||
 
        is_reserved_statement_keyword(text) ||
 
        is_reserved_expression_keyword(text) ||
 
        is_reserved_type_keyword(text);
 
}
 

	
 
pub(crate) fn seek_module(modules: &[Module], root_id: RootId) -> Option<&Module> {
 
    for module in modules {
 
        if module.root_id == root_id {
 
            return Some(module)
 
        }
 
    }
 

	
 
    return None
 
}
 

	
 
/// Constructs a human-readable message indicating why there is a conflict of
 
/// symbols.
 
// Note: passing the `module_idx` is not strictly necessary, but will prevent
 
// programmer mistakes during development: we get a conflict because we're
 
// currently parsing a particular module.
 
pub(crate) fn construct_symbol_conflict_error(
 
    modules: &[Module], module_idx: usize, ctx: &PassCtx, new_symbol: &Symbol, old_symbol: &Symbol
 
) -> ParseError {
 
    let module = &modules[module_idx];
 
    let get_symbol_span_and_msg = |symbol: &Symbol| -> (String, Option<InputSpan>) {
 
        match &symbol.variant {
 
            SymbolVariant::Module(module) => {
 
                let import = &ctx.heap[module.introduced_at];
 
                return (
 
                    format!("the module aliased as '{}' imported here", symbol.name.as_str()),
 
                    Some(import.as_module().span)
 
                );
 
            },
 
            SymbolVariant::Definition(definition) => {
 
                if definition.defined_in_module.is_invalid() {
 
                    // Must be a builtin thing
 
                    return (format!("the builtin '{}'", symbol.name.as_str()), None)
 
                } else {
 
                    if let Some(import_id) = definition.imported_at {
 
                        let import = &ctx.heap[import_id];
 
                        return (
 
                            format!("the type '{}' imported here", symbol.name.as_str()),
 
                            Some(import.as_symbols().span)
 
                        );
 
                    } else {
 
                        // This is a defined symbol. So this must mean that the
 
                        // error was caused by it being defined.
 
                        debug_assert_eq!(definition.defined_in_module, module.root_id);
 

	
 
                        return (
 
                            format!("the type '{}' defined here", symbol.name.as_str()),
 
                            Some(definition.identifier_span)
 
                        )
 
                    }
 
                }
 
            }
 
        }
 
    };
 

	
 
    let (new_symbol_msg, new_symbol_span) = get_symbol_span_and_msg(new_symbol);
 
    let (old_symbol_msg, old_symbol_span) = get_symbol_span_and_msg(old_symbol);
 
    let new_symbol_span = new_symbol_span.unwrap(); // because new symbols cannot be builtin
 

	
 
    match old_symbol_span {
 
        Some(old_symbol_span) => ParseError::new_error_at_span(
 
            &module.source, new_symbol_span, format!("symbol is defined twice: {}", new_symbol_msg)
 
        ).with_info_at_span(
 
            &module.source, old_symbol_span, format!("it conflicts with {}", old_symbol_msg)
 
        ),
 
        None => ParseError::new_error_at_span(
 
            &module.source, new_symbol_span,
 
            format!("symbol is defined twice: {} conflicts with {}", new_symbol_msg, old_symbol_msg)
 
        )
 
    }
 
}
 
\ No newline at end of file
src/protocol/parser/tokens.rs
Show inline comments
 
use crate::protocol::input_source::{
 
    InputPosition as InputPosition,
 
    InputSpan
 
};
 

	
 
/// Represents a particular kind of token. Some tokens represent
 
/// variable-character tokens. Such a token is always followed by a
 
/// `TokenKind::SpanEnd` token.
 
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
 
pub enum TokenKind {
 
    // Variable-character tokens, followed by a SpanEnd token
 
    Ident,          // regular identifier
 
    Pragma,         // identifier with prefixed `#`, range includes `#`
 
    Integer,        // integer literal
 
    String,         // string literal, range includes `"`
 
    Character,      // character literal, range includes `'`
 
    LineComment,    // line comment, range includes leading `//`, but not newline
 
    BlockComment,   // block comment, range includes leading `/*` and trailing `*/`
 
    // Punctuation (single character)
 
    Exclamation,    // !
 
    Question,       // ?
 
    Pound,          // #
 
    OpenAngle,      // <
 
    OpenCurly,      // {
 
    OpenParen,      // (
 
    OpenSquare,     // [
 
    CloseAngle,     // >
 
    CloseCurly,     // }
 
    CloseParen,     // )
 
    CloseSquare,    // ]
 
    Colon,          // :
 
    Comma,          // ,
 
    Dot,            // .
 
    SemiColon,      // ;
 
    // Operator-like (single character)
 
    At,             // @
 
    Plus,           // +
 
    Minus,          // -
 
    Star,           // *
 
    Slash,          // /
 
    Percent,        // %
 
    Caret,          // ^
 
    And,            // &
 
    Or,             // |
 
    Tilde,          // ~
 
    Equal,          // =
 
    // Punctuation (two characters)
 
    ColonColon,     // ::
 
    DotDot,         // ..
 
    ArrowRight,     // ->
 
    // Operator-like (two characters)
 
    PlusPlus,       // ++
 
    PlusEquals,     // +=
 
    MinusMinus,     // --
 
    MinusEquals,    // -=
 
    StarEquals,     // *=
 
    SlashEquals,    // /=
 
    PercentEquals,  // %=
 
    CaretEquals,    // ^=
 
    AndAnd,         // &&
 
    AndEquals,      // &=
 
    OrOr,           // ||
 
    OrEquals,       // |=
 
    EqualEqual,     // ==
 
    NotEqual,       // !=
 
    ShiftLeft,      // <<
 
    LessEquals,     // <=
 
    ShiftRight,     // >>
 
    GreaterEquals,  // >=
 
    // Operator-like (three characters)
 
    ShiftLeftEquals,// <<=
 
    ShiftRightEquals, // >>=
 
    // Special marker token to indicate end of variable-character tokens
 
    SpanEnd,
 
}
 

	
 
impl TokenKind {
 
    /// Returns true if the next expected token is the special `TokenKind::SpanEnd` token. This is
 
    /// the case for tokens of variable length (e.g. an identifier).
 
    fn has_span_end(&self) -> bool {
 
        return *self <= TokenKind::BlockComment
 
    }
 

	
 
    /// Returns the number of characters associated with the token. May only be called on tokens
 
    /// that do not have a variable length.
 
    fn num_characters(&self) -> u32 {
 
        debug_assert!(!self.has_span_end() && *self != TokenKind::SpanEnd);
 
        if *self <= TokenKind::Equal {
 
            1
 
        } else if *self <= TokenKind::ShiftRight {
 
            2
 
        } else {
 
            3
 
        }
 
    }
 

	
 
    /// Returns the characters that are represented by the token, may only be called on tokens that
 
    /// do not have a variable length.
 
    pub fn token_chars(&self) -> &'static str {
 
        debug_assert!(!self.has_span_end() && *self != TokenKind::SpanEnd);
 
        use TokenKind as TK;
 
        match self {
 
            TK::Exclamation => "!",
 
            TK::Question => "?",
 
            TK::Pound => "#",
 
            TK::OpenAngle => "<",
 
            TK::OpenCurly => "{",
 
            TK::OpenParen => "(",
 
            TK::OpenSquare => "[",
 
            TK::CloseAngle => ">",
 
            TK::CloseCurly => "}",
 
            TK::CloseParen => ")",
 
            TK::CloseSquare => "]",
 
            TK::Colon => ":",
 
            TK::Comma => ",",
 
            TK::Dot => ".",
 
            TK::SemiColon => ";",
 
            TK::At => "@",
 
            TK::Plus => "+",
 
            TK::Minus => "-",
 
            TK::Star => "*",
 
            TK::Slash => "/",
 
            TK::Percent => "%",
 
            TK::Caret => "^",
 
            TK::And => "&",
 
            TK::Or => "|",
 
            TK::Tilde => "~",
 
            TK::Equal => "=",
 
            TK::ColonColon => "::",
 
            TK::DotDot => "..",
 
            TK::ArrowRight => "->",
 
            TK::PlusPlus => "++",
 
            TK::PlusEquals => "+=",
 
            TK::MinusMinus => "--",
 
            TK::MinusEquals => "-=",
 
            TK::StarEquals => "*=",
 
            TK::SlashEquals => "/=",
 
            TK::PercentEquals => "%=",
 
            TK::CaretEquals => "^=",
 
            TK::AndAnd => "&&",
 
            TK::AndEquals => "&=",
 
            TK::OrOr => "||",
 
            TK::OrEquals => "|=",
 
            TK::EqualEqual => "==",
 
            TK::NotEqual => "!=",
 
            TK::ShiftLeft => "<<",
 
            TK::LessEquals => "<=",
 
            TK::ShiftRight => ">>",
 
            TK::GreaterEquals => ">=",
 
            TK::ShiftLeftEquals => "<<=",
 
            TK::ShiftRightEquals => ">>=",
 
            // Lets keep these in explicitly for now, in case we want to add more symbols
 
            TK::Ident | TK::Pragma | TK::Integer | TK::String | TK::Character |
 
            TK::LineComment | TK::BlockComment | TK::SpanEnd => unreachable!(),
 
        }
 
    }
 
}
 

	
 
/// Represents a single token at a particular position.
 
pub struct Token {
 
    pub kind: TokenKind,
 
    pub pos: InputPosition,
 
}
 

	
 
impl Token {
 
    pub(crate) fn new(kind: TokenKind, pos: InputPosition) -> Self {
 
        Self{ kind, pos }
 
    }
 
}
 

	
 
/// The kind of token ranges that are specially parsed by the tokenizer.
 
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
 
pub enum TokenRangeKind {
 
    Module,
 
    Pragma,
 
    Import,
 
    Definition,
 
    Code,
 
}
 

	
 
pub const NO_RELATION: i32 = -1;
 
pub const NO_SIBLING: i32 = NO_RELATION;
 

	
 
/// A range of tokens with a specific meaning. Such a range is part of a tree
 
/// where each parent tree envelops all of its children.
 
#[derive(Debug)]
 
pub struct TokenRange {
 
    // Index of parent in `TokenBuffer.ranges`, does not have a parent if the
 
    // range kind is Module, in that case the parent index points to itself.
 
    pub parent_idx: usize,
 
    // range kind is Module, in that case the parent index is -1.
 
    pub parent_idx: i32,
 
    pub range_kind: TokenRangeKind,
 
    pub curly_depth: u32,
 
    // Offsets into `TokenBuffer.ranges`: the tokens belonging to this range.
 
    pub start: u32,             // first token (inclusive index)
 
    pub end: u32,               // last token (exclusive index)
 
    // Child ranges
 
    pub num_child_ranges: u32,  // Number of subranges
 
    pub first_child_idx: u32,   // First subrange (or points to self if no subranges)
 
    pub last_child_idx: u32,    // Last subrange (or points to self if no subranges)
 
    pub next_sibling_idx: Option<u32>,
 
    pub first_child_idx: i32,   // First subrange (or -1 if no subranges)
 
    pub last_child_idx: i32,    // Last subrange (or -1 if no subranges)
 
    pub next_sibling_idx: i32,  // Next subrange (or -1 if no next subrange)
 
}
 

	
 
pub struct TokenBuffer {
 
    pub tokens: Vec<Token>,
 
    pub ranges: Vec<TokenRange>,
 
}
 

	
 
impl TokenBuffer {
 
    pub(crate) fn new() -> Self {
 
        Self{ tokens: Vec::new(), ranges: Vec::new() }
 
    }
 

	
 
    pub(crate) fn iter_range<'a>(&'a self, range: &TokenRange) -> TokenIter<'a> {
 
        TokenIter::new(self, range.start as usize, range.end as usize)
 
    }
 

	
 
    pub(crate) fn start_pos(&self, range: &TokenRange) -> InputPosition {
 
        self.tokens[range.start as usize].pos
 
    }
 

	
 
    pub(crate) fn end_pos(&self, range: &TokenRange) -> InputPosition {
 
        let last_token = &self.tokens[range.end as usize - 1];
 
        if last_token.kind == TokenKind::SpanEnd {
 
            return last_token.pos
 
        } else {
 
            debug_assert!(!last_token.kind.has_span_end());
 
            return last_token.pos.with_offset(last_token.kind.num_characters());
 
        }
 
    }
 
}
 

	
 
/// Iterator over tokens within a specific `TokenRange`.
 
pub(crate) struct TokenIter<'a> {
 
    tokens: &'a Vec<Token>,
 
    cur: usize,
 
    end: usize,
 
}
 

	
 
impl<'a> TokenIter<'a> {
 
    fn new(buffer: &'a TokenBuffer, start: usize, end: usize) -> Self {
 
        Self{ tokens: &buffer.tokens, cur: start, end }
 
    }
 

	
 
    /// Returns the next token (may include comments), or `None` if at the end
 
    /// of the range.
 
    pub(crate) fn next_including_comments(&self) -> Option<TokenKind> {
 
        if self.cur >= self.end {
 
            return None;
 
        }
 

	
 
        let token = &self.tokens[self.cur];
 
        Some(token.kind)
 
    }
 

	
 
    /// Returns the next token (but skips over comments), or `None` if at the
 
    /// end of the range
 
    pub(crate) fn next(&mut self) -> Option<TokenKind> {
 
        while let Some(token_kind) = self.next_including_comments() {
 
            if token_kind != TokenKind::LineComment && token_kind != TokenKind::BlockComment {
 
                return Some(token_kind);
 
            }
 
            self.consume();
 
        }
 

	
 
        return None
 
    }
 

	
 
    /// Peeks ahead by one token (i.e. the one that comes after `next()`), and
 
    /// skips over comments
 
    pub(crate) fn peek(&self) -> Option<TokenKind> {
 
        for next_idx in self.cur + 1..self.end {
 
            let next_kind = self.tokens[next_idx].kind;
 
            if next_kind != TokenKind::LineComment && next_kind != TokenKind::BlockComment && next_kind != TokenKind::SpanEnd {
 
                return Some(next_kind);
 
            }
 
        }
 

	
 
        return None;
 
    }
 

	
 
    /// Returns the start position belonging to the token returned by `next`. If
 
    /// there is not a next token, then we return the end position of the
 
    /// previous token.
 
    pub(crate) fn last_valid_pos(&self) -> InputPosition {
 
        if self.cur < self.end {
 
            // Return token position
 
            return self.tokens[self.cur].pos
 
        }
 

	
 
        // Return previous token end
 
        let token = &self.tokens[self.cur - 1];
 
        return if token.kind == TokenKind::SpanEnd {
 
            token.pos
 
        } else {
 
            token.pos.with_offset(token.kind.num_characters())
 
        };
 
    }
 

	
 
    /// Returns the token range belonging to the token returned by `next`. This
 
    /// assumes that we're not at the end of the range we're iterating over.
 
    /// TODO: @cleanup Phase out?
 
    pub(crate) fn next_positions(&self) -> (InputPosition, InputPosition) {
 
        debug_assert!(self.cur < self.end);
 
        let token = &self.tokens[self.cur];
 
        if token.kind.has_span_end() {
 
            let span_end = &self.tokens[self.cur + 1];
 
            debug_assert_eq!(span_end.kind, TokenKind::SpanEnd);
 
            (token.pos, span_end.pos)
 
        } else {
 
            let offset = token.kind.num_characters();
 
            (token.pos, token.pos.with_offset(offset))
 
        }
 
    }
 

	
 
    /// See `next_positions`
 
    pub(crate) fn next_span(&self) -> InputSpan {
 
        let (begin, end) = self.next_positions();
 
        return InputSpan::from_positions(begin, end)
 
    }
 

	
 
    /// Advances the iterator to the next (meaningful) token.
 
    pub(crate) fn consume(&mut self) {
 
        if let Some(kind) = self.next() {
 
            if kind.has_span_end() {
 
                self.cur += 2;
 
            } else {
 
                self.cur += 1;
 
            }
 
        }
 
    }
 

	
 
    /// Saves the current iteration position, may be passed to `load` to return
 
    /// the iterator to a previous position.
 
    pub(crate) fn save(&self) -> (usize, usize) {
 
        (self.cur, self.end)
 
    }
 

	
 
    pub(crate) fn load(&mut self, saved: (usize, usize)) {
 
        self.cur = saved.0;
 
        self.end = saved.1;
 
    }
 
}
 
\ No newline at end of file

Changeset was too big and was cut off... Show full diff anyway

0 comments (0 inline, 0 general)