Changeset - 61bb6b04e20b
[Not reviewed]
0 4 1
MH - 4 years ago 2021-12-10 00:04:00
contact@maxhenger.nl
Refactoring ParserType parser in anticipation of tuples
5 files changed with 141 insertions and 4 deletions:
0 comments (0 inline, 0 general)
src/protocol/ast.rs
Show inline comments
 
@@ -351,68 +351,72 @@ impl Display for Identifier {
 
}
 

	
 
#[derive(Debug, Clone, PartialEq, Eq)]
 
pub enum ParserTypeVariant {
 
    // Special builtin, only usable by the compiler and not constructable by the
 
    // programmer
 
    Void,
 
    InputOrOutput,
 
    ArrayLike,
 
    IntegerLike,
 
    // Basic builtin
 
    Message,
 
    Bool,
 
    UInt8, UInt16, UInt32, UInt64,
 
    SInt8, SInt16, SInt32, SInt64,
 
    Character, String,
 
    // Literals (need to get concrete builtin type during typechecking)
 
    IntegerLiteral,
 
    // Marker for inference
 
    Inferred,
 
    // Builtins expecting one subsequent type
 
    Array,
 
    Input,
 
    Output,
 
    // Tuple: expecting any number of elements. Note that the parser type can
 
    // have one-valued tuples, these will be filtered out later during type
 
    // checking.
 
    Tuple(u32), // u32 = number of subsequent types
 
    // User-defined types
 
    PolymorphicArgument(DefinitionId, u32), // u32 = index into polymorphic variables
 
    Definition(DefinitionId, u32), // u32 = number of subsequent types in the type tree.
 
}
 

	
 
impl ParserTypeVariant {
 
    pub(crate) fn num_embedded(&self) -> usize {
 
        use ParserTypeVariant::*;
 

	
 
        match self {
 
            Void | IntegerLike |
 
            Message | Bool |
 
            UInt8 | UInt16 | UInt32 | UInt64 |
 
            SInt8 | SInt16 | SInt32 | SInt64 |
 
            Character | String | IntegerLiteral |
 
            Inferred | PolymorphicArgument(_, _) =>
 
                0,
 
            ArrayLike | InputOrOutput | Array | Input | Output =>
 
                1,
 
            Definition(_, num) => *num as usize,
 
            Definition(_, num) | Tuple(num) => *num as usize,
 
        }
 
    }
 
}
 

	
 
/// ParserTypeElement is an element of the type tree. An element may be
 
/// implicit, meaning that the user didn't specify the type, but it was set by
 
/// the compiler.
 
#[derive(Debug, Clone)]
 
pub struct ParserTypeElement {
 
    pub element_span: InputSpan, // span of this element, not including the child types
 
    pub variant: ParserTypeVariant,
 
}
 

	
 
/// ParserType is a specification of a type during the parsing phase and initial
 
/// linker/validator phase of the compilation process. These types may be
 
/// (partially) inferred or represent literals (e.g. a integer whose bytesize is
 
/// not yet determined).
 
///
 
/// Its contents are the depth-first serialization of the type tree. Each node
 
/// is a type that may accept polymorphic arguments. The polymorphic arguments
 
/// are then the children of the node.
 
#[derive(Debug, Clone)]
 
pub struct ParserType {
 
    pub elements: Vec<ParserTypeElement>,
src/protocol/parser/mod.rs
Show inline comments
 
pub(crate) mod symbol_table;
 
pub(crate) mod type_table;
 
pub(crate) mod tokens;
 
pub(crate) mod token_parsing;
 
pub(crate) mod pass_tokenizer;
 
pub(crate) mod pass_symbols;
 
pub(crate) mod pass_imports;
 
pub(crate) mod pass_definitions;
 
pub(crate) mod pass_definitions_types;
 
pub(crate) mod pass_validation_linking;
 
pub(crate) mod pass_typing;
 
mod visitor;
 

	
 
use tokens::*;
 
use crate::collections::*;
 
use visitor::Visitor;
 
use pass_tokenizer::PassTokenizer;
 
use pass_symbols::PassSymbols;
 
use pass_imports::PassImport;
 
use pass_definitions::PassDefinitions;
 
use pass_validation_linking::PassValidationLinking;
 
use pass_typing::{PassTyping, ResolveQueue};
 
use symbol_table::*;
 
use type_table::TypeTable;
 

	
 
use crate::protocol::ast::*;
 
use crate::protocol::input_source::*;
 

	
 
use crate::protocol::ast_printer::ASTWriter;
 

	
 
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)]
 
pub enum ModuleCompilationPhase {
 
    Tokenized,              // source is tokenized
src/protocol/parser/pass_definitions.rs
Show inline comments
 
@@ -439,50 +439,50 @@ impl PassDefinitions {
 
                    this,
 
                    start_fork: id,
 
                    next: StatementId::new_invalid(),
 
                });
 
                section.push(end_fork.upcast());
 

	
 
                let fork_stmt = &mut ctx.heap[id];
 
                fork_stmt.end_fork = end_fork;
 
            } else if ident == KW_STMT_RETURN {
 
                let id = self.consume_return_statement(module, iter, ctx)?;
 
                section.push(id.upcast());
 
            } else if ident == KW_STMT_GOTO {
 
                let id = self.consume_goto_statement(module, iter, ctx)?;
 
                section.push(id.upcast());
 
            } else if ident == KW_STMT_NEW {
 
                let id = self.consume_new_statement(module, iter, ctx)?;
 
                section.push(id.upcast());
 
            } else if ident == KW_STMT_CHANNEL {
 
                let id = self.consume_channel_statement(module, iter, ctx)?;
 
                section.push(id.upcast().upcast());
 
            } else if iter.peek() == Some(TokenKind::Colon) {
 
                self.consume_labeled_statement(module, iter, ctx, section)?;
 
            } else {
 
                // Two fallback possibilities: the first one is a memory
 
                // declaration, the other one is to parse it as a regular
 
                // expression. This is a bit ugly
 
                // declaration, the other one is to parse it as a normal
 
                // expression. This is a bit ugly.
 
                if let Some((memory_stmt_id, assignment_stmt_id)) = self.maybe_consume_memory_statement(module, iter, ctx)? {
 
                    section.push(memory_stmt_id.upcast().upcast());
 
                    section.push(assignment_stmt_id.upcast());
 
                } else {
 
                    let id = self.consume_expression_statement(module, iter, ctx)?;
 
                    section.push(id.upcast());
 
                }
 
            }
 
        } else {
 
            let id = self.consume_expression_statement(module, iter, ctx)?;
 
            section.push(id.upcast());
 
        }
 

	
 
        return Ok(());
 
    }
 

	
 
    fn consume_block_statement(
 
        &mut self, module: &Module, iter: &mut TokenIter, ctx: &mut PassCtx
 
    ) -> Result<BlockStatementId, ParseError> {
 
        let open_span = consume_token(&module.source, iter, TokenKind::OpenCurly)?;
 
        self.consume_block_statement_without_leading_curly(module, iter, ctx, open_span.begin)
 
    }
 

	
 
    fn consume_block_statement_without_leading_curly(
 
@@ -1688,49 +1688,49 @@ impl PassDefinitions {
 
            |_source, iter, ctx| self.consume_expression(module, iter, ctx),
 
            &mut section, "an expression", "a list of expressions", end_pos
 
        )?;
 
        Ok(section.into_vec())
 
    }
 
}
 

	
 
/// Consumes a type. A type always starts with an identifier which may indicate
 
/// a builtin type or a user-defined type. The fact that it may contain
 
/// polymorphic arguments makes it a tree-like structure. Because we cannot rely
 
/// on knowing the exact number of polymorphic arguments we do not check for
 
/// these.
 
///
 
/// Note that the first depth index is used as a hack.
 
// TODO: @Optimize, @Cleanup
 
fn consume_parser_type(
 
    source: &InputSource, iter: &mut TokenIter, symbols: &SymbolTable, heap: &Heap, poly_vars: &[Identifier],
 
    cur_scope: SymbolScope, wrapping_definition: DefinitionId, allow_inference: bool, first_angle_depth: i32,
 
) -> Result<ParserType, ParseError> {
 
    struct Entry{
 
        element: ParserTypeElement,
 
        depth: i32,
 
    }
 

	
 
    // After parsing the array modified "[]", we need to insert an array type
 
    // After parsing the array modifier "[]", we need to insert an array type
 
    // before the most recently parsed type.
 
    fn insert_array_before(elements: &mut Vec<Entry>, depth: i32, span: InputSpan) {
 
        let index = elements.iter().rposition(|e| e.depth == depth).unwrap();
 
        let num_embedded = elements[index].element.variant.num_embedded();
 
        elements.insert(index, Entry{
 
            element: ParserTypeElement{ element_span: span, variant: ParserTypeVariant::Array },
 
            depth,
 
        });
 

	
 
        // Now the original element, and all of its children, should have their
 
        // depth incremented by 1
 
        elements[index + 1].depth += 1;
 
        if num_embedded != 0 {
 
            for idx in index + 2..elements.len() {
 
                let element = &mut elements[idx];
 
                if element.depth >= depth + 1 {
 
                    element.depth += 1;
 
                } else {
 
                    break;
 
                }
 
            }
 
        }
 
    }
 

	
src/protocol/parser/pass_definitions_types.rs
Show inline comments
 
new file 100644
 
use crate::protocol::*;
 
use crate::protocol::parser::*;
 

	
 
struct Entry {
 
    element: ParserTypeElement,
 
    depth: i32,
 
}
 

	
 
#[derive(Copy, Clone)]
 
enum DepthKind {
 
    Tuple, // because we had a `(` token
 
    PolyArgs, // because we had a `<` token
 
}
 

	
 
struct DepthElement {
 
    kind: DepthKind,
 
    pos: InputPosition,
 
}
 

	
 
/// Temporarily keep the error around in unevaluated format because sometimes
 
/// when we evaluate a `ParserType`, we perform some backtracking if it fails.
 
enum ParserTypeError {
 
    ExpectedAType(InputPosition),
 
    MessageAt(InputPosition, &'static str),
 
    TwoMessageAt(InputPosition, &'static str, InputPosition, &'static str),
 
}
 

	
 
/// Parsers tokens into `ParserType` instances (yes, the name of the struct is
 
/// silly). Essentially a little state machine with its own temporary storage.
 
pub(crate) struct ParserTypeParser {
 
    entries: Vec<Entry>,
 
    depths: Vec<DepthElement>,
 
    first_pos: InputPosition,
 
    last_pos: InputPosition,
 
}
 

	
 
impl ParserTypeParser {
 
    pub(crate) fn consume_parser_type(
 
        &mut self,
 
        source: &InputSource, iter: &mut TokenIter,
 
        symbols: &SymbolTable, heap: &Heap, poly_vars: &[Identifier],
 
        cur_scope: SymbolScope, wrapping_definition: DefinitionId,
 
        allow_inference: bool, inside_angular_bracket: Option<InputPosition>,
 
    ) -> Result<ParserType, ParserTypeError> {
 
        // Make sure we start in an empty state
 
        debug_assert!(self.entries.is_empty());
 
        debug_assert!(self.depths.is_empty());
 

	
 
        // Setup processing
 
        if let Some(bracket_pos) = inside_angular_bracket {
 
            self.push_depth(DepthKind::PolyArgs, bracket_pos);
 
        }
 

	
 
        let first_element = match iter.next() {
 
            Some(TokenKind::Ident) => {
 

	
 
            },
 
            Some(TokenKind::OpenParen) => {
 
                let tuple_start_pos = iter.next_start_position();
 
                self.push_depth(DepthKind::Tuple, tuple_start_pos);
 
                self.entries.push(Entry{
 
                    element: ParserTypeElement{
 
                        element_span: InputSpan::from_positions(tuple_start_pos, tuple_start_pos),
 
                        variant: ParserTypeVariant::Tuple(0),
 
                    },
 
                    depth: self.cur_depth(),
 
                });
 
                iter.consume();
 
            },
 
            _ => return Err(ParserTypeError::MessageAt(iter.last_valid_pos(), "expected a type")),
 
        };
 

	
 
        // Convert the results from parsing into the `ParserType`
 
        let mut elements = Vec::with_capacity(self.entries.len());
 
        debug_assert!(!self.entries.is_empty());
 

	
 
        for entry in self.entries.drain(..) {
 
            elements.push(entry.element)
 
        }
 

	
 
        return Ok(ParserType{
 
            elements,
 
            full_span: InputSpan::from_positions(self.first_pos, self.last_pos),
 
        });
 
    }
 

	
 
    /// Consumes an identifier that should resolve to some kind of type. There
 
    /// may be trailing '::' tokens, commas, or polymorphic arguments.
 
    fn consume_parser_type_ident(
 
        &self, iter: &mut TokenIter, symbols: &SymbolTable, poly_vars: &[Identifier],
 
        allow_inference: bool,
 
    ) -> Result<ParserTypeElement, ParserTypeError> {
 

	
 
    }
 

	
 
    #[inline]
 
    fn push_depth(&mut self, kind: DepthKind, pos: InputPosition) {
 
        self.depths.push(DepthElement{ kind, pos });
 
    }
 

	
 
    #[inline]
 
    fn pop_depth(&mut self, kind: DepthKind, pos: InputPosition) -> Result<(), ParserTypeError> {
 
        if self.depths.is_empty() {
 
            // More closing parens than opening ones
 
            let message = match kind {
 
                DepthKind::Tuple => "unmatched ')'",
 
                DepthKind::PolyArgs => "unmatched '>'",
 
            };
 
            return Err(ParserTypeError::MessageAt(pos, message))
 
        }
 

	
 
        let last = *self.depths.last().unwrap();
 
        if last != kind {
 
            // Wrong kind of paren
 
            let (message_second) = match kind {
 
                DepthKind::Tuple => "unexpected closing"
 
            }
 
        }
 
    }
 

	
 
    #[inline]
 
    fn cur_depth(&self) -> i32 {
 
        return self.depths.len() as i32;
 
    }
 
}
 
\ No newline at end of file
src/protocol/parser/tokens.rs
Show inline comments
 
@@ -277,48 +277,55 @@ impl<'a> TokenIter<'a> {
 
            }
 
        }
 

	
 
        return None;
 
    }
 

	
 
    /// Returns the start position belonging to the token returned by `next`. If
 
    /// there is not a next token, then we return the end position of the
 
    /// previous token.
 
    pub(crate) fn last_valid_pos(&self) -> InputPosition {
 
        if self.cur < self.end {
 
            // Return token position
 
            return self.tokens[self.cur].pos
 
        }
 

	
 
        // Return previous token end
 
        let token = &self.tokens[self.cur - 1];
 
        return if token.kind == TokenKind::SpanEnd {
 
            token.pos
 
        } else {
 
            token.pos.with_offset(token.kind.num_characters())
 
        };
 
    }
 

	
 
    /// Assumes the token is not at the end and returns the starting position
 
    /// belonging to the token returned by `next`.
 
    pub(crate) fn next_start_position(&self) -> InputPosition {
 
        debug_assert!(self.cur < self.end);
 
        return self.tokens[self.cur].pos;
 
    }
 

	
 
    /// Returns the token range belonging to the token returned by `next`. This
 
    /// assumes that we're not at the end of the range we're iterating over.
 
    pub(crate) fn next_positions(&self) -> (InputPosition, InputPosition) {
 
        debug_assert!(self.cur < self.end);
 
        let token = &self.tokens[self.cur];
 
        if token.kind.has_span_end() {
 
            let span_end = &self.tokens[self.cur + 1];
 
            debug_assert_eq!(span_end.kind, TokenKind::SpanEnd);
 
            (token.pos, span_end.pos)
 
        } else {
 
            let offset = token.kind.num_characters();
 
            (token.pos, token.pos.with_offset(offset))
 
        }
 
    }
 

	
 
    /// See `next_positions`
 
    pub(crate) fn next_span(&self) -> InputSpan {
 
        let (begin, end) = self.next_positions();
 
        return InputSpan::from_positions(begin, end)
 
    }
 

	
 
    /// Advances the iterator to the next (meaningful) token.
 
    pub(crate) fn consume(&mut self) {
 
        if let Some(kind) = self.next_including_comments() {
0 comments (0 inline, 0 general)