Changeset - 61bb6b04e20b
[Not reviewed]
0 4 1
MH - 4 years ago 2021-12-10 00:04:00
contact@maxhenger.nl
Refactoring ParserType parser in anticipation of tuples
5 files changed with 141 insertions and 4 deletions:
0 comments (0 inline, 0 general)
src/protocol/ast.rs
Show inline comments
 
@@ -327,116 +327,120 @@ pub struct ImportSymbols {
 
    pub this: ImportId,
 
    // Phase 1: parser
 
    pub span: InputSpan,
 
    pub module: Identifier,
 
    pub module_id: RootId,
 
    pub symbols: Vec<AliasedSymbol>,
 
}
 

	
 
#[derive(Debug, Clone)]
 
pub struct Identifier {
 
    pub span: InputSpan,
 
    pub value: StringRef<'static>,
 
}
 

	
 
impl PartialEq for Identifier {
 
    fn eq(&self, other: &Self) -> bool {
 
        return self.value == other.value
 
    }
 
}
 

	
 
impl Display for Identifier {
 
    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
 
        write!(f, "{}", self.value.as_str())
 
    }
 
}
 

	
 
#[derive(Debug, Clone, PartialEq, Eq)]
 
pub enum ParserTypeVariant {
 
    // Special builtin, only usable by the compiler and not constructable by the
 
    // programmer
 
    Void,
 
    InputOrOutput,
 
    ArrayLike,
 
    IntegerLike,
 
    // Basic builtin
 
    Message,
 
    Bool,
 
    UInt8, UInt16, UInt32, UInt64,
 
    SInt8, SInt16, SInt32, SInt64,
 
    Character, String,
 
    // Literals (need to get concrete builtin type during typechecking)
 
    IntegerLiteral,
 
    // Marker for inference
 
    Inferred,
 
    // Builtins expecting one subsequent type
 
    Array,
 
    Input,
 
    Output,
 
    // Tuple: expecting any number of elements. Note that the parser type can
 
    // have one-valued tuples, these will be filtered out later during type
 
    // checking.
 
    Tuple(u32), // u32 = number of subsequent types
 
    // User-defined types
 
    PolymorphicArgument(DefinitionId, u32), // u32 = index into polymorphic variables
 
    Definition(DefinitionId, u32), // u32 = number of subsequent types in the type tree.
 
}
 

	
 
impl ParserTypeVariant {
 
    pub(crate) fn num_embedded(&self) -> usize {
 
        use ParserTypeVariant::*;
 

	
 
        match self {
 
            Void | IntegerLike |
 
            Message | Bool |
 
            UInt8 | UInt16 | UInt32 | UInt64 |
 
            SInt8 | SInt16 | SInt32 | SInt64 |
 
            Character | String | IntegerLiteral |
 
            Inferred | PolymorphicArgument(_, _) =>
 
                0,
 
            ArrayLike | InputOrOutput | Array | Input | Output =>
 
                1,
 
            Definition(_, num) => *num as usize,
 
            Definition(_, num) | Tuple(num) => *num as usize,
 
        }
 
    }
 
}
 

	
 
/// ParserTypeElement is an element of the type tree. An element may be
 
/// implicit, meaning that the user didn't specify the type, but it was set by
 
/// the compiler.
 
#[derive(Debug, Clone)]
 
pub struct ParserTypeElement {
 
    pub element_span: InputSpan, // span of this element, not including the child types
 
    pub variant: ParserTypeVariant,
 
}
 

	
 
/// ParserType is a specification of a type during the parsing phase and initial
 
/// linker/validator phase of the compilation process. These types may be
 
/// (partially) inferred or represent literals (e.g. a integer whose bytesize is
 
/// not yet determined).
 
///
 
/// Its contents are the depth-first serialization of the type tree. Each node
 
/// is a type that may accept polymorphic arguments. The polymorphic arguments
 
/// are then the children of the node.
 
#[derive(Debug, Clone)]
 
pub struct ParserType {
 
    pub elements: Vec<ParserTypeElement>,
 
    pub full_span: InputSpan,
 
}
 

	
 
impl ParserType {
 
    pub(crate) fn iter_embedded(&self, parent_idx: usize) -> ParserTypeIter {
 
        ParserTypeIter::new(&self.elements, parent_idx)
 
    }
 
}
 

	
 
/// Iterator over the embedded elements of a specific element.
 
pub struct ParserTypeIter<'a> {
 
    pub elements: &'a [ParserTypeElement],
 
    pub cur_embedded_idx: usize,
 
}
 

	
 
impl<'a> ParserTypeIter<'a> {
 
    fn new(elements: &'a [ParserTypeElement], parent_idx: usize) -> Self {
 
        debug_assert!(parent_idx < elements.len(), "parent index exceeds number of elements in ParserType");
 
        if elements[0].variant.num_embedded() == 0 {
 
            // Parent element does not have any embedded types, place
 
            // `cur_embedded_idx` at end so we will always return `None`
 
            Self{ elements, cur_embedded_idx: elements.len() }
 
        } else {
 
            // Parent element has an embedded type
src/protocol/parser/mod.rs
Show inline comments
 
pub(crate) mod symbol_table;
 
pub(crate) mod type_table;
 
pub(crate) mod tokens;
 
pub(crate) mod token_parsing;
 
pub(crate) mod pass_tokenizer;
 
pub(crate) mod pass_symbols;
 
pub(crate) mod pass_imports;
 
pub(crate) mod pass_definitions;
 
pub(crate) mod pass_definitions_types;
 
pub(crate) mod pass_validation_linking;
 
pub(crate) mod pass_typing;
 
mod visitor;
 

	
 
use tokens::*;
 
use crate::collections::*;
 
use visitor::Visitor;
 
use pass_tokenizer::PassTokenizer;
 
use pass_symbols::PassSymbols;
 
use pass_imports::PassImport;
 
use pass_definitions::PassDefinitions;
 
use pass_validation_linking::PassValidationLinking;
 
use pass_typing::{PassTyping, ResolveQueue};
 
use symbol_table::*;
 
use type_table::TypeTable;
 

	
 
use crate::protocol::ast::*;
 
use crate::protocol::input_source::*;
 

	
 
use crate::protocol::ast_printer::ASTWriter;
 

	
 
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)]
 
pub enum ModuleCompilationPhase {
 
    Tokenized,              // source is tokenized
 
    SymbolsScanned,         // all definitions are linked to their type class
 
    ImportsResolved,        // all imports are added to the symbol table
 
    DefinitionsParsed,      // produced the AST for the entire module
 
    TypesAddedToTable,      // added all definitions to the type table
 
    ValidatedAndLinked,     // AST is traversed and has linked the required AST nodes
 
    // When we continue with the compiler:
 
    // Typed,                  // Type inference and checking has been performed
 
}
 

	
 
pub struct Module {
 
    // Buffers
 
    pub source: InputSource,
 
    pub tokens: TokenBuffer,
 
    // Identifiers
 
    pub root_id: RootId,
 
    pub name: Option<(PragmaId, StringRef<'static>)>,
 
    pub version: Option<(PragmaId, i64)>,
 
    pub phase: ModuleCompilationPhase,
 
}
 

	
 
// TODO: This is kind of wrong. Because when we're producing bytecode we would
 
//       like the bytecode itself to not have the notion of the size of a pointer
 
//       type. But until I figure out what we do want I'll just set everything
 
//       to a 64-bit architecture.
src/protocol/parser/pass_definitions.rs
Show inline comments
 
@@ -415,98 +415,98 @@ impl PassDefinitions {
 
            } else if ident == KW_STMT_BREAK {
 
                let id = self.consume_break_statement(module, iter, ctx)?;
 
                section.push(id.upcast());
 
            } else if ident == KW_STMT_CONTINUE {
 
                let id = self.consume_continue_statement(module, iter, ctx)?;
 
                section.push(id.upcast());
 
            } else if ident == KW_STMT_SYNC {
 
                let id = self.consume_synchronous_statement(module, iter, ctx)?;
 
                section.push(id.upcast());
 

	
 
                let end_sync = ctx.heap.alloc_end_synchronous_statement(|this| EndSynchronousStatement {
 
                    this,
 
                    start_sync: id,
 
                    next: StatementId::new_invalid()
 
                });
 
                section.push(end_sync.upcast());
 

	
 
                let sync_stmt = &mut ctx.heap[id];
 
                sync_stmt.end_sync = end_sync;
 
            } else if ident == KW_STMT_FORK {
 
                let id = self.consume_fork_statement(module, iter, ctx)?;
 
                section.push(id.upcast());
 

	
 
                let end_fork = ctx.heap.alloc_end_fork_statement(|this| EndForkStatement{
 
                    this,
 
                    start_fork: id,
 
                    next: StatementId::new_invalid(),
 
                });
 
                section.push(end_fork.upcast());
 

	
 
                let fork_stmt = &mut ctx.heap[id];
 
                fork_stmt.end_fork = end_fork;
 
            } else if ident == KW_STMT_RETURN {
 
                let id = self.consume_return_statement(module, iter, ctx)?;
 
                section.push(id.upcast());
 
            } else if ident == KW_STMT_GOTO {
 
                let id = self.consume_goto_statement(module, iter, ctx)?;
 
                section.push(id.upcast());
 
            } else if ident == KW_STMT_NEW {
 
                let id = self.consume_new_statement(module, iter, ctx)?;
 
                section.push(id.upcast());
 
            } else if ident == KW_STMT_CHANNEL {
 
                let id = self.consume_channel_statement(module, iter, ctx)?;
 
                section.push(id.upcast().upcast());
 
            } else if iter.peek() == Some(TokenKind::Colon) {
 
                self.consume_labeled_statement(module, iter, ctx, section)?;
 
            } else {
 
                // Two fallback possibilities: the first one is a memory
 
                // declaration, the other one is to parse it as a regular
 
                // expression. This is a bit ugly
 
                // declaration, the other one is to parse it as a normal
 
                // expression. This is a bit ugly.
 
                if let Some((memory_stmt_id, assignment_stmt_id)) = self.maybe_consume_memory_statement(module, iter, ctx)? {
 
                    section.push(memory_stmt_id.upcast().upcast());
 
                    section.push(assignment_stmt_id.upcast());
 
                } else {
 
                    let id = self.consume_expression_statement(module, iter, ctx)?;
 
                    section.push(id.upcast());
 
                }
 
            }
 
        } else {
 
            let id = self.consume_expression_statement(module, iter, ctx)?;
 
            section.push(id.upcast());
 
        }
 

	
 
        return Ok(());
 
    }
 

	
 
    fn consume_block_statement(
 
        &mut self, module: &Module, iter: &mut TokenIter, ctx: &mut PassCtx
 
    ) -> Result<BlockStatementId, ParseError> {
 
        let open_span = consume_token(&module.source, iter, TokenKind::OpenCurly)?;
 
        self.consume_block_statement_without_leading_curly(module, iter, ctx, open_span.begin)
 
    }
 

	
 
    fn consume_block_statement_without_leading_curly(
 
        &mut self, module: &Module, iter: &mut TokenIter, ctx: &mut PassCtx, open_curly_pos: InputPosition
 
    ) -> Result<BlockStatementId, ParseError> {
 
        let mut stmt_section = self.statements.start_section();
 
        let mut next = iter.next();
 
        while next != Some(TokenKind::CloseCurly) {
 
            if next.is_none() {
 
                return Err(ParseError::new_error_str_at_pos(
 
                    &module.source, iter.last_valid_pos(), "expected a statement or '}'"
 
                ));
 
            }
 
            self.consume_statement(module, iter, ctx, &mut stmt_section)?;
 
            next = iter.next();
 
        }
 

	
 
        let statements = stmt_section.into_vec();
 
        let mut block_span = consume_token(&module.source, iter, TokenKind::CloseCurly)?;
 
        block_span.begin = open_curly_pos;
 

	
 
        let id = ctx.heap.alloc_block_statement(|this| BlockStatement{
 
            this,
 
            is_implicit: false,
 
            span: block_span,
 
            statements,
 
            end_block: EndBlockStatementId::new_invalid(),
 
@@ -1664,97 +1664,97 @@ impl PassDefinitions {
 
            let right = higher_precedence_fn(self, module, iter, ctx)?;
 

	
 
            let full_span = InputSpan::from_positions(
 
                ctx.heap[left].full_span().begin,
 
                ctx.heap[right].full_span().end,
 
            );
 

	
 
            result = ctx.heap.alloc_binary_expression(|this| BinaryExpression{
 
                this, operator_span, full_span, left, operation, right,
 
                parent: ExpressionParent::None,
 
                unique_id_in_definition: -1,
 
            }).upcast();
 
        }
 

	
 
        Ok(result)
 
    }
 

	
 
    #[inline]
 
    fn consume_expression_list(
 
        &mut self, module: &Module, iter: &mut TokenIter, ctx: &mut PassCtx, end_pos: Option<&mut InputPosition>
 
    ) -> Result<Vec<ExpressionId>, ParseError> {
 
        let mut section = self.expressions.start_section();
 
        consume_comma_separated(
 
            TokenKind::OpenParen, TokenKind::CloseParen, &module.source, iter, ctx,
 
            |_source, iter, ctx| self.consume_expression(module, iter, ctx),
 
            &mut section, "an expression", "a list of expressions", end_pos
 
        )?;
 
        Ok(section.into_vec())
 
    }
 
}
 

	
 
/// Consumes a type. A type always starts with an identifier which may indicate
 
/// a builtin type or a user-defined type. The fact that it may contain
 
/// polymorphic arguments makes it a tree-like structure. Because we cannot rely
 
/// on knowing the exact number of polymorphic arguments we do not check for
 
/// these.
 
///
 
/// Note that the first depth index is used as a hack.
 
// TODO: @Optimize, @Cleanup
 
fn consume_parser_type(
 
    source: &InputSource, iter: &mut TokenIter, symbols: &SymbolTable, heap: &Heap, poly_vars: &[Identifier],
 
    cur_scope: SymbolScope, wrapping_definition: DefinitionId, allow_inference: bool, first_angle_depth: i32,
 
) -> Result<ParserType, ParseError> {
 
    struct Entry{
 
        element: ParserTypeElement,
 
        depth: i32,
 
    }
 

	
 
    // After parsing the array modified "[]", we need to insert an array type
 
    // After parsing the array modifier "[]", we need to insert an array type
 
    // before the most recently parsed type.
 
    fn insert_array_before(elements: &mut Vec<Entry>, depth: i32, span: InputSpan) {
 
        let index = elements.iter().rposition(|e| e.depth == depth).unwrap();
 
        let num_embedded = elements[index].element.variant.num_embedded();
 
        elements.insert(index, Entry{
 
            element: ParserTypeElement{ element_span: span, variant: ParserTypeVariant::Array },
 
            depth,
 
        });
 

	
 
        // Now the original element, and all of its children, should have their
 
        // depth incremented by 1
 
        elements[index + 1].depth += 1;
 
        if num_embedded != 0 {
 
            for idx in index + 2..elements.len() {
 
                let element = &mut elements[idx];
 
                if element.depth >= depth + 1 {
 
                    element.depth += 1;
 
                } else {
 
                    break;
 
                }
 
            }
 
        }
 
    }
 

	
 
    // Most common case we just have one type, perhaps with some array
 
    // annotations. This is both the hot-path, and simplifies the state machine
 
    // that follows and is responsible for parsing more complicated types.
 
    let element = consume_parser_type_ident(
 
        source, iter, symbols, heap, poly_vars, cur_scope,
 
        wrapping_definition, allow_inference
 
    )?;
 

	
 
    if iter.next() != Some(TokenKind::OpenAngle) {
 
        let num_embedded = element.variant.num_embedded();
 
        let first_pos = element.element_span.begin;
 
        let mut last_pos = element.element_span.end;
 
        let mut elements = Vec::with_capacity(num_embedded + 2); // type itself + embedded + 1 (maybe) array type
 

	
 
        // Consume any potential array elements
 
        while iter.next() == Some(TokenKind::OpenSquare) {
 
            let mut array_span = iter.next_span();
 
            iter.consume();
 

	
 
            let end_span = iter.next_span();
 
            array_span.end = end_span.end;
 
            consume_token(source, iter, TokenKind::CloseSquare)?;
 

	
 
            last_pos = end_span.end;
src/protocol/parser/pass_definitions_types.rs
Show inline comments
 
new file 100644
 
use crate::protocol::*;
 
use crate::protocol::parser::*;
 

	
 
struct Entry {
 
    element: ParserTypeElement,
 
    depth: i32,
 
}
 

	
 
#[derive(Copy, Clone)]
 
enum DepthKind {
 
    Tuple, // because we had a `(` token
 
    PolyArgs, // because we had a `<` token
 
}
 

	
 
struct DepthElement {
 
    kind: DepthKind,
 
    pos: InputPosition,
 
}
 

	
 
/// Temporarily keep the error around in unevaluated format because sometimes
 
/// when we evaluate a `ParserType`, we perform some backtracking if it fails.
 
enum ParserTypeError {
 
    ExpectedAType(InputPosition),
 
    MessageAt(InputPosition, &'static str),
 
    TwoMessageAt(InputPosition, &'static str, InputPosition, &'static str),
 
}
 

	
 
/// Parsers tokens into `ParserType` instances (yes, the name of the struct is
 
/// silly). Essentially a little state machine with its own temporary storage.
 
pub(crate) struct ParserTypeParser {
 
    entries: Vec<Entry>,
 
    depths: Vec<DepthElement>,
 
    first_pos: InputPosition,
 
    last_pos: InputPosition,
 
}
 

	
 
impl ParserTypeParser {
 
    pub(crate) fn consume_parser_type(
 
        &mut self,
 
        source: &InputSource, iter: &mut TokenIter,
 
        symbols: &SymbolTable, heap: &Heap, poly_vars: &[Identifier],
 
        cur_scope: SymbolScope, wrapping_definition: DefinitionId,
 
        allow_inference: bool, inside_angular_bracket: Option<InputPosition>,
 
    ) -> Result<ParserType, ParserTypeError> {
 
        // Make sure we start in an empty state
 
        debug_assert!(self.entries.is_empty());
 
        debug_assert!(self.depths.is_empty());
 

	
 
        // Setup processing
 
        if let Some(bracket_pos) = inside_angular_bracket {
 
            self.push_depth(DepthKind::PolyArgs, bracket_pos);
 
        }
 

	
 
        let first_element = match iter.next() {
 
            Some(TokenKind::Ident) => {
 

	
 
            },
 
            Some(TokenKind::OpenParen) => {
 
                let tuple_start_pos = iter.next_start_position();
 
                self.push_depth(DepthKind::Tuple, tuple_start_pos);
 
                self.entries.push(Entry{
 
                    element: ParserTypeElement{
 
                        element_span: InputSpan::from_positions(tuple_start_pos, tuple_start_pos),
 
                        variant: ParserTypeVariant::Tuple(0),
 
                    },
 
                    depth: self.cur_depth(),
 
                });
 
                iter.consume();
 
            },
 
            _ => return Err(ParserTypeError::MessageAt(iter.last_valid_pos(), "expected a type")),
 
        };
 

	
 
        // Convert the results from parsing into the `ParserType`
 
        let mut elements = Vec::with_capacity(self.entries.len());
 
        debug_assert!(!self.entries.is_empty());
 

	
 
        for entry in self.entries.drain(..) {
 
            elements.push(entry.element)
 
        }
 

	
 
        return Ok(ParserType{
 
            elements,
 
            full_span: InputSpan::from_positions(self.first_pos, self.last_pos),
 
        });
 
    }
 

	
 
    /// Consumes an identifier that should resolve to some kind of type. There
 
    /// may be trailing '::' tokens, commas, or polymorphic arguments.
 
    fn consume_parser_type_ident(
 
        &self, iter: &mut TokenIter, symbols: &SymbolTable, poly_vars: &[Identifier],
 
        allow_inference: bool,
 
    ) -> Result<ParserTypeElement, ParserTypeError> {
 

	
 
    }
 

	
 
    #[inline]
 
    fn push_depth(&mut self, kind: DepthKind, pos: InputPosition) {
 
        self.depths.push(DepthElement{ kind, pos });
 
    }
 

	
 
    #[inline]
 
    fn pop_depth(&mut self, kind: DepthKind, pos: InputPosition) -> Result<(), ParserTypeError> {
 
        if self.depths.is_empty() {
 
            // More closing parens than opening ones
 
            let message = match kind {
 
                DepthKind::Tuple => "unmatched ')'",
 
                DepthKind::PolyArgs => "unmatched '>'",
 
            };
 
            return Err(ParserTypeError::MessageAt(pos, message))
 
        }
 

	
 
        let last = *self.depths.last().unwrap();
 
        if last != kind {
 
            // Wrong kind of paren
 
            let (message_second) = match kind {
 
                DepthKind::Tuple => "unexpected closing"
 
            }
 
        }
 
    }
 

	
 
    #[inline]
 
    fn cur_depth(&self) -> i32 {
 
        return self.depths.len() as i32;
 
    }
 
}
 
\ No newline at end of file
src/protocol/parser/tokens.rs
Show inline comments
 
@@ -253,91 +253,98 @@ impl<'a> TokenIter<'a> {
 
        let token = &self.tokens[self.cur];
 
        Some(token.kind)
 
    }
 

	
 
    /// Returns the next token (but skips over comments), or `None` if at the
 
    /// end of the range
 
    pub(crate) fn next(&mut self) -> Option<TokenKind> {
 
        while let Some(token_kind) = self.next_including_comments() {
 
            if token_kind != TokenKind::LineComment && token_kind != TokenKind::BlockComment {
 
                return Some(token_kind);
 
            }
 
            self.consume();
 
        }
 

	
 
        return None
 
    }
 

	
 
    /// Peeks ahead by one token (i.e. the one that comes after `next()`), and
 
    /// skips over comments
 
    pub(crate) fn peek(&self) -> Option<TokenKind> {
 
        for next_idx in self.cur + 1..self.end {
 
            let next_kind = self.tokens[next_idx].kind;
 
            if next_kind != TokenKind::LineComment && next_kind != TokenKind::BlockComment && next_kind != TokenKind::SpanEnd {
 
                return Some(next_kind);
 
            }
 
        }
 

	
 
        return None;
 
    }
 

	
 
    /// Returns the start position belonging to the token returned by `next`. If
 
    /// there is not a next token, then we return the end position of the
 
    /// previous token.
 
    pub(crate) fn last_valid_pos(&self) -> InputPosition {
 
        if self.cur < self.end {
 
            // Return token position
 
            return self.tokens[self.cur].pos
 
        }
 

	
 
        // Return previous token end
 
        let token = &self.tokens[self.cur - 1];
 
        return if token.kind == TokenKind::SpanEnd {
 
            token.pos
 
        } else {
 
            token.pos.with_offset(token.kind.num_characters())
 
        };
 
    }
 

	
 
    /// Assumes the token is not at the end and returns the starting position
 
    /// belonging to the token returned by `next`.
 
    pub(crate) fn next_start_position(&self) -> InputPosition {
 
        debug_assert!(self.cur < self.end);
 
        return self.tokens[self.cur].pos;
 
    }
 

	
 
    /// Returns the token range belonging to the token returned by `next`. This
 
    /// assumes that we're not at the end of the range we're iterating over.
 
    pub(crate) fn next_positions(&self) -> (InputPosition, InputPosition) {
 
        debug_assert!(self.cur < self.end);
 
        let token = &self.tokens[self.cur];
 
        if token.kind.has_span_end() {
 
            let span_end = &self.tokens[self.cur + 1];
 
            debug_assert_eq!(span_end.kind, TokenKind::SpanEnd);
 
            (token.pos, span_end.pos)
 
        } else {
 
            let offset = token.kind.num_characters();
 
            (token.pos, token.pos.with_offset(offset))
 
        }
 
    }
 

	
 
    /// See `next_positions`
 
    pub(crate) fn next_span(&self) -> InputSpan {
 
        let (begin, end) = self.next_positions();
 
        return InputSpan::from_positions(begin, end)
 
    }
 

	
 
    /// Advances the iterator to the next (meaningful) token.
 
    pub(crate) fn consume(&mut self) {
 
        if let Some(kind) = self.next_including_comments() {
 
            if kind.has_span_end() {
 
                self.cur += 2;
 
            } else {
 
                self.cur += 1;
 
            }
 
        }
 
    }
 

	
 
    /// Saves the current iteration position, may be passed to `load` to return
 
    /// the iterator to a previous position.
 
    pub(crate) fn save(&self) -> (usize, usize) {
 
        (self.cur, self.end)
 
    }
 

	
 
    pub(crate) fn load(&mut self, saved: (usize, usize)) {
 
        self.cur = saved.0;
 
        self.end = saved.1;
 
    }
 
}
 
\ No newline at end of file
0 comments (0 inline, 0 general)