Changeset - a52ba09256ad
[Not reviewed]
0 2 0
MH - 4 years ago 2021-04-23 10:26:53
henger@cwi.nl
WIP on compiler rearchitecting
2 files changed with 102 insertions and 8 deletions:
0 comments (0 inline, 0 general)
src/protocol/parser/pass_definitions.rs
Show inline comments
 
use crate::protocol::ast::*;
 
use super::symbol_table2::*;
 
use super::{Module, ModuleCompilationPhase, PassCtx};
 
use super::tokens::*;
 
use super::token_parsing::*;
 
use crate::protocol::input_source2::{InputSource2 as InputSource, InputPosition2 as InputPosition, InputSpan, ParseError};
 
use crate::collections::*;
 

	
 
/// Parses all the tokenized definitions into actual AST nodes.
 
pub(crate) struct PassDefinitions {
 
    buffer: String,
 
    identifiers: Vec<Identifier>,
 
    struct_fields: Vec<StructFieldDefinition>,
 
    enum_variants: Vec<EnumVariantDefinition>,
 
    union_variants: Vec<UnionVariantDefinition>,
 
    parameters: Vec<ParameterId>,
 
    expressions: ScopedBuffer<ExpressionId>,
 
    parser_types: Vec<ParserType>,
 
}
 

	
 
impl PassDefinitions {
 
    pub(crate) fn parse(&mut self, modules: &mut [Module], module_idx: usize, ctx: &mut PassCtx) -> Result<(), ParseError> {
 
        let module = &modules[module_idx];
 
        let module_range = &module.tokens.ranges[0];
 
        debug_assert_eq!(module.phase, ModuleCompilationPhase::ImportsResolved);
 
        debug_assert_eq!(module_range.range_kind, TokenRangeKind::Module);
 

	
 
        // TODO: Very important to go through ALL ranges of the module so that we parse the entire
 
        //  input source. Only skip the ones we're certain we've handled before.
 
        let mut range_idx = module_range.first_child_idx;
 
        loop {
 
            let range_idx_usize = range_idx as usize;
 
            let cur_range = &module.tokens.ranges[range_idx_usize];
 

	
 
            if cur_range.range_kind == TokenRangeKind::Definition {
 
                self.visit_definition_range(modules, module_idx, ctx, range_idx_usize)?;
 
            }
 

	
 
            match cur_range.next_sibling_idx {
 
                Some(idx) => { range_idx = idx; },
 
                None => { break; },
 
            }
 
        }
 

	
 

	
 

	
 
        Ok(())
 
    }
 

	
 
    fn visit_definition_range(
 
        &mut self, modules: &[Module], module_idx: usize, ctx: &mut PassCtx, range_idx: usize
 
    ) -> Result<(), ParseError> {
 
        let module = &modules[module_idx];
 
        let cur_range = &module.tokens.ranges[range_idx];
 
        debug_assert_eq!(cur_range.range_kind, TokenRangeKind::Definition);
 

	
 
        // Detect which definition we're parsing
 
        let mut iter = module.tokens.iter_range(cur_range);
 
        let keyword = peek_ident(&module.source, &mut iter).unwrap();
 
        match keyword {
 
            KW_STRUCT => {
 

	
 
            },
 
            KW_ENUM => {
 

	
 
            },
 
            KW_UNION => {
 

	
 
            },
 
            KW_FUNCTION => {
 

	
 
            },
 
            KW_PRIMITIVE => {
 

	
 
            },
 
            KW_COMPOSITE => {
 

	
 
            },
 
            _ => unreachable!("encountered keyword '{}' in definition range", String::from_utf8_lossy(keyword)),
 
        };
 

	
 
        Ok(())
 
    }
 

	
 
    // TODO: @Cleanup, still not sure about polymorphic variable parsing. Pre-parsing the variables
 
    //  allows us to directly construct proper ParserType trees. But this does require two lookups
 
    //  of the corresponding definition.
 
    fn visit_struct_definition(
 
        &mut self, module: &Module, iter: &mut TokenIter, ctx: &mut PassCtx
 
    ) -> Result<(), ParseError> {
 
        consume_exact_ident(&module.source, iter, KW_STRUCT)?;
 
        let (ident_text, _) = consume_ident(&module.source, iter)?;
 

	
 
        // Retrieve preallocated DefinitionId
 
        let module_scope = SymbolScope::Module(module.root_id);
 
        let definition_id = ctx.symbols.get_symbol_by_name_defined_in_scope(module_scope, ident_text)
 
            .unwrap().variant.as_definition().definition_id;
 
        let poly_vars = ctx.heap[definition_id].poly_vars();
 

	
 
        // Parse struct definition
 
        consume_polymorphic_vars_spilled(source, iter)?;
 
        debug_assert!(self.struct_fields.is_empty());
 
        consume_comma_separated(
 
            TokenKind::OpenCurly, TokenKind::CloseCurly, source, iter,
 
            |source, iter| {
 
                let parser_type = consume_parser_type(
 
                    source, iter, &ctx.symbols, &ctx.heap, poly_vars, module_scope, definition_id, false
 
                )?;
 
                let field = consume_ident_interned(source, iter, ctx)?;
 
                Ok(StructFieldDefinition{ field, parser_type })
 
            },
 
            &mut self.struct_fields, "a struct field", "a list of struct fields"
 
        )?;
 

	
 
        // Transfer to preallocated definition
 
        let struct_def = ctx.heap[definition_id].as_struct_mut();
 
        struct_def.fields.clone_from(&self.struct_fields);
 
        self.struct_fields.clear();
 

	
 
        Ok(())
 
    }
 

	
 
    fn visit_enum_definition(
 
        &mut self, module: &Module, iter: &mut TokenIter, ctx: &mut PassCtx
 
    ) -> Result<(), ParseError> {
 
        consume_exact_ident(&module.source, iter, KW_ENUM)?;
 
        let (ident_text, _) = consume_ident(&module.source, iter)?;
 

	
 
        // Retrieve preallocated DefinitionId
 
        let module_scope = SymbolScope::Module(module.root_id);
 
        let definition_id = ctx.symbols.get_symbol_by_name_defined_in_scope(module_scope, ident_text)
 
            .unwrap().variant.as_definition().definition_id;
 
        let poly_vars = ctx.heap[definition_id].poly_vars();
 

	
 
        // Parse enum definition
 
        consume_polymorphic_vars_spilled(source, iter)?;
 
        debug_assert!(self.enum_variants.is_empty());
 
        consume_comma_separated(
 
            TokenKind::OpenCurly, TokenKind::CloseCurly, source, iter,
 
            |source, iter| {
 
                let identifier = consume_ident_interned(source, iter, ctx)?;
 
                let value = if iter.next() == Some(TokenKind::Equal) {
 
                    iter.consume();
 
                    let (variant_number, _) = consume_integer_literal(source, iter, &mut self.buffer)?;
 
                    EnumVariantValue::Integer(variant_number as i64) // TODO: @int
 
                } else {
 
                    EnumVariantValue::None
 
                };
 
                Ok(EnumVariantDefinition{ identifier, value })
 
            },
 
            &mut self.enum_variants, "an enum variant", "a list of enum variants"
 
        )?;
 

	
 
        // Transfer to definition
 
        let enum_def = ctx.heap[definition_id].as_enum_mut();
 
        enum_def.variants.clone_from(&self.enum_variants);
 
        self.enum_variants.clear();
 

	
 
        Ok(())
 
    }
 

	
 
    fn visit_union_definition(
 
        &mut self, module: &Module, iter: &mut TokenIter, ctx: &mut PassCtx
 
    ) -> Result<(), ParseError> {
 
        consume_exact_ident(&module.source, iter, KW_UNION)?;
 
        let (ident_text, _) = consume_ident(&module.source, iter)?;
 

	
 
        // Retrieve preallocated DefinitionId
 
        let module_scope = SymbolScope::Module(module.root_id);
 
        let definition_id = ctx.symbols.get_symbol_by_name_defined_in_scope(module_scope, ident_text)
 
            .unwrap().variant.as_definition().definition_id;
 
        let poly_vars = ctx.heap[definition_id].poly_vars();
 

	
 
        // Parse union definition
 
        consume_polymorphic_vars_spilled(source, iter)?;
 
        debug_assert!(self.union_variants.is_empty());
 
        consume_comma_separated(
 
            TokenKind::OpenCurly, TokenKind::CloseCurly, source, iter,
 
            |source, iter| {
 
                let identifier = consume_ident_interned(source, iter, ctx)?;
 
                let close_pos = identifier.span.end;
 
                let has_embedded = maybe_consume_comma_separated(
 
                    TokenKind::OpenParen, TokenKind::CloseParen, source, iter,
 
                    |source, iter| {
 
                        consume_parser_type(
 
                            source, iter, &ctx.symbols, &ctx.heap, poly_vars,
 
                            module_scope, definition_id, false
 
                        )
 
                    },
 
                    &mut self.parser_types, "an embedded type", Some(&mut close_pos)
 
                )?;
 
                let value = if has_embedded {
 
                    UnionVariantValue::Embedded(self.parser_types.clone())
 
                } else {
 
                    UnionVariantValue::None
 
                };
 
                self.parser_types.clear();
 

	
 
                Ok(UnionVariantDefinition{
 
                    span: InputSpan::from_positions(identifier.span.begin, close_pos),
 
                    identifier,
 
                    value
 
                })
 
            },
 
            &mut self.union_variants, "a union variant", "a list of union variants", None
 
        )?;
 

	
 
        // Transfer to AST
 
        let union_def = ctx.heap[definition_id].as_union_mut();
 
        union_def.variants.clone_from(&self.union_variants);
 
        self.union_variants.clear();
 

	
 
        Ok(())
 
    }
 

	
 
    fn visit_function_definition(
 
        &mut self, module: &Module, iter: &mut TokenIter, ctx: &mut PassCtx
 
    ) -> Result<(), ParseError> {
 
        consume_exact_ident(&module.source, iter, KW_FUNCTION)?;
 
        let (ident_text, _) = consume_ident(&module.source, iter)?;
 

	
 
        // Retrieve preallocated DefinitionId
 
        let module_scope = SymbolScope::Module(module.root_id);
 
        let definition_id = ctx.symbols.get_symbol_by_name_defined_in_scope(module_scope, ident_text)
 
            .unwrap().variant.as_definition().definition_id;
 
        let poly_vars = ctx.heap[definition_id].poly_vars();
 

	
 
        // Parse function's argument list
 
        consume_parameter_list(
 
            source, iter, ctx, &mut self.parameters, poly_vars, module_scope, definition_id
 
        )?;
 
        let parameters = self.parameters.clone();
 
        self.parameters.clear();
 

	
 
        // Consume return types
 
        consume_comma_separated(
 
            TokenKind::ArrowRight, TokenKind::OpenCurly, &module.source, iter,
 
            |source, iter| {
 
                consume_parser_type(source, iter, &ctx.symbols, &ctx.heap, poly_vars, module_scope, definition_id, false)
 
            },
 
            &mut self.parser_types, "a return type", "the return types", None
 
        )?;
 
        let return_types = self.parser_types.clone();
 
        self.parser_types.clear();
 

	
 
        // Consume block
 
    }
 

	
 
    fn consume_statement(
 
        &mut self, module: &Module, iter: &mut TokenIter, ctx: &mut PassCtx
 
    ) -> Result<StatementId, ParseError> {
 
        let next = iter.next().expect("consume_statement has a next token");
 

	
 
        if next == TokenKind::OpenCurly {
 
            return self.consume_block_statement(module, iter, ctx)?.upcast();
 
        } else if next == TokenKind::Ident {
 
            let (ident, _) = consume_any_ident(source, iter)?;
 
            if ident == KW_STMT_IF {
 
                return self.consume_if_statement(module, iter, ctx)?;
 
            } else if ident == KW_STMT_WHILE {
 
                return self.consume_while_statement(module, iter, ctx)?;
 
            } else if ident == KW_STMT_BREAK {
 
                return self.consume_break_statement(module, iter, ctx)?;
 
            } else if ident == KW_STMT_CONTINUE {
 
                return self.consume_continue_statement(module, iter, ctx)?;
 
            } else if ident == KW_STMT_SYNC {
 
                return self.consume_synchronous_statement(module, iter, ctx)?;
 
            } else if ident == KW_STMT_RETURN {
 
                return self.consume_return_statement(module, iter, ctx)?;
 
            } else if ident == KW_STMT_ASSERT {
 
                // TODO: Unify all builtin function calls as expressions
 
                return self.consume_assert_statement(module, iter, ctx)?;
 
            } else if ident == KW_STMT_GOTO {
 
                return self.consume_goto_statement(module, iter, ctx)?;
 
            } else if ident == KW_STMT_NEW {
 
                return self.consume_new_statement(module, iter, ctx)?;
 
            } else if iter.peek() == Some(TokenKind::Colon) {
 
                return self.consume_labeled_statement(module, iter, ctx)?;
 
            }
 
        }
 

	
 
        // If here then attempt to parse as expression
 
        return self.consume_expr_statement(module, iter, ctx)?;
 
    }
 

	
 
    fn consume_block_statement(
 
        &mut self, module: &Module, iter: &mut TokenIter, ctx: &mut PassCtx
 
    ) -> Result<BlockStatementId, ParseError> {
 
        let open_span = consume_token(source, iter, TokenKind::OpenCurly)?;
 
        self.consume_block_statement_without_leading_curly(module, iter, ctx, open_span.begin)
 
    }
 

	
 
    fn consume_block_statement_without_leading_curly(
 
        &mut self, module: &Module, iter: &mut TokenIter, ctx: &mut PassCtx, open_curly_pos: InputPosition
 
    ) -> Result<BlockStatementId, ParseError> {
 
        let mut statements = Vec::new();
 
        let mut next = iter.next();
 
        while next.is_some() && next != Some(TokenKind::CloseCurly) {
 

	
 
        }
 

	
 
        let mut block_span = consume_token(&module.source, iter, TokenKind::CloseCurly)?;
 
        block_span.begin = open_curly_pos;
 

	
 
        Ok(ctx.heap.alloc_block_statement(|this| BlockStatement{
 
            this,
 
            span: block_span,
 
            statements,
 
            parent_scope: None,
 
            relative_pos_in_parent: 0,
 
            locals: Vec::new(),
 
            labels: Vec::new(),
 
        }))
 
    }
 

	
 
    fn consume_if_statement(
 
        &mut self, module: &Module, iter: &mut TokenIter, ctx: &mut PassCtx
 
    ) -> Result<IfStatementId, ParseError> {
 
        consume_exact_ident(&module.source, iter, KW_STMT_IF)?;
 
        let test = consume_parenthesized_expression()
 
    }
 

	
 
    //--------------------------------------------------------------------------
 
    // Expression Parsing
 
    //--------------------------------------------------------------------------
 

	
 
    fn consume_expression(
 
        &mut self, module: &Module, iter: &mut TokenIter, ctx: &mut PassCtx
 
    ) -> Result<ExpressionId, ParseError> {
 
        self.consume_assignment_expression(module, iter, ctx)
 
    }
 

	
 
    fn consume_assignment_expression(
 
        &mut self, module: &Module, iter: &mut TokenIter, ctx: &mut PassCtx
 
    ) -> Result<ExpressionId, ParseError> {
 
        // Utility to convert token into assignment operator
 
        fn parse_assignment_operator(token: Option<TokenKind>) -> Option<AssignmentOperator> {
 
            use TokenKind as TK;
 
            use AssignmentOperator as AO;
 

	
 
            if token.is_none() {
 
                return None
 
            }
 

	
 
            let matched = match token.unwrap() {
 
                TK::Equal               => Some(AO::Set),
 
                TK::StarEquals          => Some(AO::Multiplied),
 
                TK::SlashEquals         => Some(AO::Divided),
 
                TK::PercentEquals       => Some(AO::Remained),
 
                TK::PlusEquals          => Some(AO::Added),
 
                TK::MinusEquals         => Some(AO::Subtracted),
 
                TK::ShiftLeftEquals     => Some(AO::ShiftedLeft),
 
                TK::ShiftRightEquals    => Some(AO::ShiftedRight),
 
                TK::AndEquals           => Some(AO::BitwiseAnded),
 
                TK::CaretEquals         => Some(AO::BitwiseXored),
 
                TK::OrEquals            => Some(AO::BitwiseOred),
 
                _                       => None
 
            };
 
        }
 

	
 
        let expr = self.consume_conditional_expression(module, iter, ctx)?;
 
        if let Some(operation) = parse_assignment_operator(iter.next()) {
 
            let span = iter.next_span();
 
            iter.consume();
 

	
 
            let left = expr;
 
            let right = self.consume_expression(module, iter, ctx)?;
 

	
 
            Ok(ctx.heap.alloc_assignment_expression(|this| AssignmentExpression{
 
                this, span, left, operation, right,
 
                parent: ExpressionParent::None,
 
                concrete_type: ConcreteType::default(),
 
            }).upcast())
 
        } else {
 
            Ok(expr)
 
        }
 
    }
 

	
 
    fn consume_conditional_expression(
 
        &mut self, module: &Module, iter: &mut TokenIter, ctx: &mut PassCtx
 
    ) -> Result<ExpressionId, ParseError> {
 
        let result = self.consume_concat_expression(module, iter, ctx)?;
 
        if let Some(TokenKind::Question) = iter.next() {
 
            let span = iter.next_span();
 
            iter.consume();
 

	
 
            let test = result;
 
            let true_expression = self.consume_expression(module, iter, ctx)?;
 
            consume_token(source, iter, TokenKind::Colon)?;
 
            let false_expression = self.consume_expression(module, iter, ctx)?;
 
            Ok(ctx.heap.alloc_conditional_expression(|this| ConditionalExpression{
 
                this, span, test, true_expression, false_expression,
 
                parent: ExpressionParent::None,
 
                concrete_type: ConcreteType::default(),
 
            }).upcast())
 
        } else {
 
            Ok(result)
 
        }
 
    }
 

	
 
    fn consume_concat_expression(
 
        &mut self, module: &Module, iter: &mut TokenIter, ctx: &mut PassCtx
 
    ) -> Result<ExpressionId, ParseError> {
 
        self.consume_generic_binary_expression(
 
            module, iter, ctx,
 
            |token| match token {
 
                Some(TokenKind::At) => Some(BinaryOperator::Concatenate),
 
                _ => None
 
            },
 
            Self::consume_logical_or_expression
 
        )
 
    }
 

	
 
    fn consume_logical_or_expression(
 
        &mut self, module: &Module, iter: &mut TokenIter, ctx: &mut PassCtx
 
    ) -> Result<ExpressionId, ParseError> {
 
        self.consume_generic_binary_expression(
 
            module, iter, ctx,
 
            |token| match token {
 
                Some(TokenKind::OrOr) => Some(BinaryOperator::LogicalOr),
 
                _ => None
 
            },
 
            Self::consume_logical_and_expression
 
        )
 
    }
 

	
 
    fn consume_logical_and_expression(
 
        &mut self, module: &Module, iter: &mut TokenIter, ctx: &mut PassCtx
 
    ) -> Result<ExpressionId, ParseError> {
 
        self.consume_generic_binary_expression(
 
            module, iter, ctx,
 
            |token| match token {
 
                Some(TokenKind::AndAnd) => Some(BinaryOperator::LogicalAnd),
 
                _ => None
 
            },
 
            Self::consume_bitwise_or_expression
 
        )
 
    }
 

	
 
    fn consume_bitwise_or_expression(
 
        &mut self, module: &Module, iter: &mut TokenIter, ctx: &mut PassCtx
 
    ) -> Result<ExpressionId, ParseError> {
 
        self.consume_generic_binary_expression(
 
            module, iter, ctx,
 
            |token| match token {
 
                Some(TokenKind::Or) => Some(BinaryOperator::BitwiseOr),
 
                _ => None
 
            },
 
            Self::consume_bitwise_xor_expression
 
        )
 
    }
 

	
 
    fn consume_bitwise_xor_expression(
 
        &mut self, module: &Module, iter: &mut TokenIter, ctx: &mut PassCtx
 
    ) -> Result<ExpressionId, ParseError> {
 
        self.consume_generic_binary_expression(
 
            module, iter, ctx,
 
            |token| match token {
 
                Some(TokenKind::Caret) => Some(BinaryOperator::BitwiseXor),
 
                _ => None
 
            },
 
            Self::consume_bitwise_and_expression
 
        )
 
    }
 

	
 
    fn consume_bitwise_and_expression(
 
        &mut self, module: &Module, iter: &mut TokenIter, ctx: &mut PassCtx
 
    ) -> Result<ExpressionId, ParseError> {
 
        self.consume_generic_binary_expression(
 
            module, iter, ctx,
 
            |token| match token {
 
                Some(TokenKind::And) => Some(BinaryOperator::BitwiseAnd),
 
                _ => None
 
            },
 
            Self::consume_equality_expression
 
        )
 
    }
 

	
 
    fn consume_equality_expression(
 
        &mut self, module: &Module, iter: &mut TokenIter, ctx: &mut PassCtx
 
    ) -> Result<ExpressionId, ParseError> {
 
        self.consume_generic_binary_expression(
 
            module, iter, ctx,
 
            |token| match token {
 
                Some(TokenKind::EqualEqual) => Some(BinaryOperator::Equality),
 
                Some(TokenKind::NotEqual) => Some(BinaryOperator::Inequality),
 
                _ => None
 
            },
 
            Self::consume_relational_expression
 
        )
 
    }
 

	
 
    fn consume_relational_expression(
 
        &mut self, module: &Module, iter: &mut TokenIter, ctx: &mut PassCtx
 
    ) -> Result<ExpressionId, ParseError> {
 
        self.consume_generic_binary_expression(
 
            module, iter, ctx,
 
            |token| match token {
 
                Some(TokenKind::OpenAngle) => Some(BinaryOperator::LessThan),
 
                Some(TokenKind::CloseAngle) => Some(BinaryOperator::GreaterThan),
 
                Some(TokenKind::LessEquals) => Some(BinaryOperator::LessThanEqual),
 
                Some(TokenKind::GreaterEquals) => Some(BinaryOperator::GreaterThanEqual),
 
                _ => None
 
            },
 
            Self::consume_shift_expression
 
        )
 
    }
 

	
 
    fn consume_shift_expression(
 
        &mut self, module: &Module, iter: &mut TokenIter, ctx: &mut PassCtx
 
    ) -> Result<ExpressionId, ParseError> {
 
        self.consume_generic_binary_expression(
 
            module, iter, ctx,
 
            |token| match token {
 
                Some(TokenKind::ShiftLeft) => Some(BinaryOperator::ShiftLeft),
 
                Some(TokenKind::ShiftRight) => Some(BinaryOperator::ShiftRight),
 
                _ => None
 
            },
 
            Self::consume_add_or_subtract_expression
 
        )
 
    }
 

	
 
    fn consume_add_or_subtract_expression(
 
        &mut self, module: &Module, iter: &mut TokenIter, ctx: &mut PassCtx
 
    ) -> Result<ExpressionId, ParseError> {
 
        self.consume_generic_binary_expression(
 
            module, iter, ctx,
 
            |token| match token {
 
                Some(TokenKind::Plus) => Some(BinaryOperator::Add),
 
                Some(TokenKind::Minus) => Some(BinaryOperator::Subtract),
 
                _ => None,
 
            },
 
            Self::consume_multiply_divide_or_modulus_expression
 
        )
 
    }
 

	
 
    fn consume_multiply_divide_or_modulus_expression(
 
        &mut self, module: &Module, iter: &mut Tokeniter, ctx: &mut PassCtx
 
    ) -> Result<ExpressionId, ParseError> {
 
        self.consume_generic_binary_expression(
 
            module, iter, ctx,
 
            |token| match token {
 
                Some(TokenKind::Star) => Some(BinaryOperator::Multiply),
 
                Some(TokenKind::Slash) => Some(BinaryOperator::Divide),
 
                Some(TokenKind::Percent) => Some(BinaryOperator::Remainder),
 
                _ => None
 
            },
 
            Self::consume_prefix_expression
 
        )
 
    }
 

	
 
    fn consume_prefix_expression(
 
        &mut self, module: &Module, iter: &mut TokenIter, ctx: &mut PassCtx
 
    ) -> Result<ExpressionId, ParseError> {
 
        fn parse_prefix_token(token: Option<TokenKind>) -> Some(UnaryOperation) {
 
            use TokenKind as TK;
 
            use UnaryOperation as UO;
 
            match token {
 
                Some(TK::Plus) => Some(UO::Positive),
 
                Some(TK::Minus) => Some(UO::Negative),
 
                Some(TK::PlusPlus) => Some(UO::PreIncrement),
 
                Some(TK::MinusMinus) => Some(UO::PreDecrement),
 
                Some(TK::Tilde) => Some(UO::BitwiseNot),
 
                Some(TK::Exclamation) => Some(UO::LogicalNot),
 
                _ => None
 
            }
 
        }
 

	
 
        if let Some(operation) = parse_prefix_token(iter.next()) {
 
            let span = iter.next_span();
 
            iter.consume();
 

	
 
            let expression = self.consume_prefix_expression(module, iter, ctx)?;
 
            Ok(ctx.heap.alloc_unary_expression(|this| UnaryExpression {
 
                this, span, operation, expression,
 
                parent: ExpressionParent::None,
 
                concrete_type: ConcreteType::default()
 
            }).upcast())
 
        } else {
 
            self.consume_postfix_expression(module, iter, ctx)
 
        }
 
    }
 

	
 
    fn consume_postfix_expression(
 
        &mut self, module: &Module, iter: &mut TokenIter, ctx: &mut PassCtx
 
    ) -> Result<ExpressionId, ParseError> {
 
        fn has_matching_postfix_token(token: Option<TokenKind>) -> bool {
 
            use TokenKind as TK;
 

	
 
            if token.is_none() { return false; }
 
            match token.unwrap() {
 
                TK::PlusPlus | TK::MinusMinus | TK::OpenSquare | TK::Dot => true,
 
                _ => false
 
            }
 
        }
 

	
 
        let mut result = self.consume_primary_expression(module, iter, ctx)?;
 
        let mut next = iter.next();
 
        while has_matching_postfix_token(next) {
 
            let token = next.unwrap();
 
            let mut span = iter.next_span();
 
            iter.consume();
 

	
 
            if token == TokenKind::PlusPlus {
 
                result = ctx.heap.alloc_unary_expression(|this| UnaryExpression{
 
                    this, span,
 
                    operation: UnaryOperation::PostIncrement,
 
                    expression: result,
 
                    parent: ExpressionParent::None,
 
                    concrete_type: ConcreteType::default()
 
                }).upcast();
 
            } else if token == TokenKind::MinusMinus {
 
                result = ctx.heap.alloc_unary_expression(|this| UnaryExpression{
 
                    this, span,
 
                    operation: UnaryOperation::PostDecrement,
 
                    expression: result,
 
                    parent: ExpressionParent::None,
 
                    concrete_type: ConcreteType::default()
 
                }).upcast();
 
            } else if token == TokenKind::OpenSquare {
 
                let subject = result;
 
                let from_index = self.consume_expression(module, iter, ctx)?;
 

	
 
                // Check if we have an indexing or slicing operation
 
                next = iter.next();
 
                if Some(TokenKind::DotDot) = next {
 
                    iter.consume();
 

	
 
                    let to_index = self.consume_expression(module, iter, ctx)?;
 
                    let end_span = consume_token(&module.source, iter, TokenKind::CloseSquare)?;
 
                    span.end = end_span.end;
 

	
 
                    result = ctx.heap.alloc_slicing_expression(|this| SlicingExpression{
 
                        this, span, subject, from_index, to_index,
 
                        parent: ExpressionParent::None,
 
                        concrete_type: ConcreteType::default()
 
                    }).upcast();
 
                } else if Some(TokenKind::CloseSquare) {
 
                    let end_span = consume_token(&module.source, iter, TokenKind::CloseSquare)?;
 
                    span.end = end_span.end;
 

	
 
                    result = ctx.heap.alloc_indexing_expression(|this| IndexingExpression{
 
                        this, span, subject,
 
                        index: from_index,
 
                        parent: ExpressionParent::None,
 
                        concrete_type: ConcreteType::default()
 
                    }).upcast();
 
                } else {
 
                    return Err(ParseError::new_error_str_at_pos(
 
                        &module.source, iter.last_valid_pos(), "unexpected token: expected ']' or '..'"
 
                    ));
 
                }
 
            } else {
 
                debug_assert_eq!(token, TokenKind::Dot);
 
                let subject = result;
 
                let (field_text, field_span) = consume_ident(&module.source, iter)?;
 
                let field = if field_text == b"length" {
 
                    Field::Length
 
                } else {
 
                    let value = ctx.pool.intern(field_text);
 
                    let identifier = Identifier{ value, span: field_span };
 
                    Field::Symbolic(FieldSymbolic{ identifier, definition: None, field_idx: 0 });
 
                };
 

	
 
                result = ctx.heap.alloc_select_expression(|this| SelectExpression{
 
                    this, span, subject, field,
 
                    parent: ExpressionParent::None,
 
                    concrete_type: ConcreteType::default()
 
                }).upcast();
 
            }
 

	
 
            next = iter.next();
 
        }
 

	
 
        Ok(result)
 
    }
 

	
 
    fn consume_primary_expression(
 
        &mut self, module: &Module, iter: &mut TokenIter, ctx: &mut PassCtx
 
    ) -> Result<ExpressionId, ParseError> {
 
        let next = iter.next();
 

	
 
        let result;
 
        if next == Some(TokenKind::OpenParen) {
 
            // Expression between parentheses
 
            iter.consume();
 
            result = self.consume_expression(module, iter, ctx)?;
 
            consume_token(&module.source, iter, TokenKind::CloseParen)?;
 
        } else if next == Some(TokenKind::OpenCurly) {
 
            // Array literal
 
            let (start_pos, mut end_pos) = iter.next_positions();
 
            let mut expressions = Vec::new();
 
            consume_comma_separated(
 
                TokenKind::OpenCurly, TokenKind::CloseCurly, &module.source, iter,
 
                |source, iter| self.consume_expression(module, iter, ctx),
 
                &mut expressions, "an expression", "a list of expressions", Some(&mut end_pos)
 
            )?;
 

	
 
            // TODO: Turn into literal
 
            result = ctx.heap.alloc_array_expression(|this| ArrayExpression{
 
                this,
 
                span: InputSpan::from_positions(start_pos, end_pos),
 
                elements: expressions,
 
                parent: ExpressionParent::None,
 
                concrete_type: ConcreteType::default(),
 
            }).upcast();
 
        } else if next == Some(TokenKind::Integer) {
 
            let (literal, span) = consume_integer_literal(&module.source, iter, &mut self.buffer)?;
 
            result = ctx.heap.alloc_literal_expression(|this| LiteralExpression{
 
                this, span,
 
                value: Literal::Integer(LiteralInteger{ unsigned_value: literal, negated: false }),
 
                parent: ExpressionParent::None,
 
                concrete_type: ConcreteType::default(),
 
            }).upcast();
 
        } else if next == Some(TokenKind::String) {
 
            let (text, span) = consume_string_literal(&module.source, iter, &mut self.buffer)?;
 
            let span = consume_string_literal(&module.source, iter, &mut self.buffer)?;
 
            let interned = ctx.pool.intern(self.buffer.as_bytes());
 
            result = ctx.heap.alloc_literal_expression(|this| LiteralExpression{
 
                this, span,
 
                value: Literal::String(interned),
 
                parent: ExpressionParent::None,
 
                concrete_type: ConcreteType::default(),
 
            }).upcast();
 
        } else if next == Some(TokenKind::Character) {
 

	
 
            let (character, span) = consume_character_literal(&module.source, iter)?;
 
            result = ctx.heap.alloc_literal_expression(|this| LiteralExpression{
 
                this, span,
 
                value: Literal::Character(character),
 
                parent: ExpressionParent::None,
 
                concrete_type: ConcreteType::default(),
 
            }).upcast();
 
        } else if next == Some(TokenKind::Ident) {
 
            // May be a variable, a type instantiation or a function call. If we
 
            // have a single identifier that we cannot find in the type table
 
            // then we're going to assume that we're dealign with a variable.
 
        }
 

	
 
        Ok(result)
 
    }
 

	
 
    //--------------------------------------------------------------------------
 
    // Expression Utilities
 
    //--------------------------------------------------------------------------
 

	
 
    #[inline]
 
    fn consume_generic_binary_expression<
 
        M: Fn(Option<TokenKind>) -> Option<BinaryOperator>,
 
        F: Fn(&mut PassDefinitions, &Module, &mut TokenIter, &mut PassCtx) -> Result<ExpressionId, ParseError>
 
    >(
 
        &mut self, module: &Module, iter: &mut TokenIter, ctx: &mut PassCtx, match_fn: M, higher_precedence_fn: F
 
    ) -> Result<ExpressionId, ParseError> {
 
        let mut result = higher_precedence_fn(self, module, iter, ctx)?;
 
        while let Some(operation) = match_fn(iter.next()) {
 
            let span = iter.next_span();
 
            iter.consume();
 

	
 
            let left = result;
 
            let right = higher_precedence_fn(self, module, iter, ctx)?;
 

	
 
            result = ctx.heap.alloc_binary_expression(|this| BinaryExpression{
 
                this, span, left, operation, right,
 
                parent: ExpressionParent::None,
 
                concrete_type: ConcreteType::default()
 
            }).upcast();
 
        }
 

	
 
        Ok(result)
 
    }
 
}
 

	
 
/// Consumes a type. A type always starts with an identifier which may indicate
 
/// a builtin type or a user-defined type. The fact that it may contain
 
/// polymorphic arguments makes it a tree-like structure. Because we cannot rely
 
/// on knowing the exact number of polymorphic arguments we do not check for
 
/// these.
 
// TODO: @Optimize, and fix spans if needed
 
fn consume_parser_type(
 
    source: &InputSource, iter: &mut TokenIter, symbols: &SymbolTable, heap: &Heap, poly_vars: &[Identifier],
 
    cur_scope: SymbolScope, wrapping_definition: DefinitionId, allow_inference: bool
 
) -> Result<ParserType, ParseError> {
 
    struct Entry{
 
        element: ParserTypeElement,
 
        depth: i32,
 
    }
 

	
 
    fn insert_array_before(elements: &mut Vec<Entry>, depth: i32, span: InputSpan) {
 
        let index = elements.iter().rposition(|e| e.depth == depth).unwrap();
 
        elements.insert(index, Entry{
 
            element: ParserTypeElement{ full_span: span, variant: ParserTypeVariant::Array },
 
            depth,
 
        });
 
    }
 

	
 
    // Most common case we just have one type, perhaps with some array
 
    // annotations.
 
    let element = consume_parser_type_ident(source, iter, symbols, heap, poly_vars, cur_scope, wrapping_definition, allow_inference)?;
 
    if iter.next() != Some(TokenKind::OpenAngle) {
 
        let mut num_array = 0;
 
        while iter.next() == Some(TokenKind::OpenSquare) {
 
            iter.consume();
 
            consume_token(source, iter, TokenKind::CloseSquare)?;
 
            num_array += 1;
 
        }
 

	
 
        let array_span = element.full_span;
 
        let mut elements = Vec::with_capacity(num_array + 1);
 
        for _ in 0..num_array {
 
            elements.push(ParserTypeElement{ full_span: array_span, variant: ParserTypeVariant::Array });
 
        }
 
        elements.push(element);
 

	
 
        return Ok(ParserType{ elements });
 
    };
 

	
 
    // We have a polymorphic specification. So we start by pushing the item onto
 
    // our stack, then start adding entries together with the angle-brace depth
 
    // at which they're found.
 
    let mut elements = Vec::new();
 
    elements.push(Entry{ element, depth: 0 });
 

	
 
    // Start out with the first '<' consumed.
 
    iter.consume();
 
    enum State { Ident, Open, Close, Comma };
 
    let mut state = State::Open;
 
    let mut angle_depth = 1;
 

	
 
    loop {
 
        let next = iter.next();
 

	
 
        match state {
 
            State::Ident => {
 
                // Just parsed an identifier, may expect comma, angled braces,
 
                // or the tokens indicating an array
 
                if Some(TokenKind::OpenAngle) == next {
 
                    angle_depth += 1;
 
                    state = State::Open;
 
                } else if Some(TokenKind::CloseAngle) == next {
 
                    angle_depth -= 1;
 
                    state = State::Close;
 
                } else if Some(TokenKind::ShiftRight) == next {
 
                    angle_depth -= 2;
 
                    state = State::Close;
 
                } else if Some(TokenKind::Comma) == next {
 
                    state = State::Comma;
 
                } else if Some(TokenKind::OpenSquare) == next {
 
                    let (start_pos, _) = iter.next_positions();
 
                    iter.consume(); // consume opening square
 
                    if iter.next() != Some(TokenKind::CloseSquare) {
 
                        return Err(ParseError::new_error_str_at_pos(
 
                            source, iter.last_valid_pos(),
 
                            "unexpected token: expected ']'"
 
                        ));
 
                    }
 
                    let (_, end_pos) = iter.next_positions();
 
                    let array_span = InputSpan::from_positions(start_pos, end_pos);
 
                    insert_array_before(&mut elements, angle_depth, array_span);
 
                } else {
 
                    return Err(ParseError::new_error_str_at_pos(
 
                        source, iter.last_valid_pos(),
 
                        "unexpected token: expected '<', '>', ',' or '['")
 
                    );
 
                }
 

	
 
                iter.consume();
 
            },
 
            State::Open => {
 
                // Just parsed an opening angle bracket, expecting an identifier
 
                let element = consume_parser_type_ident(source, iter, symbols, heap, poly_vars, cur_scope, wrapping_definition, allow_inference)?;
 
                elements.push(Entry{ element, depth: angle_depth });
 
                state = State::Ident;
 
            },
 
            State::Close => {
 
                // Just parsed 1 or 2 closing angle brackets, expecting comma,
 
                // more closing brackets or the tokens indicating an array
 
                if Some(TokenKind::Comma) == next {
 
                    state = State::Comma;
 
                } else if Some(TokenKind::CloseAngle) == next {
 
                    angle_depth -= 1;
 
                    state = State::Close;
 
                } else if Some(TokenKind::ShiftRight) == next {
 
                    angle_depth -= 2;
 
                    state = State::Close;
 
                } else if Some(TokenKind::OpenSquare) == next {
 
                    let (start_pos, _) = iter.next_positions();
 
                    iter.consume();
 
                    if iter.next() != Some(TokenKind::CloseSquare) {
 
                        return Err(ParseError::new_error_str_at_pos(
 
                            source, iter.last_valid_pos(),
 
                            "unexpected token: expected ']'"
 
                        ));
 
                    }
 
                    let (_, end_pos) = iter.next_positions();
 
                    let array_span = InputSpan::from_positions(start_pos, end_pos);
 
                    insert_array_before(&mut elements, angle_depth, array_span);
 
                } else {
 
                    return Err(ParseError::new_error_str_at_pos(
 
                        source, iter.last_valid_pos(),
 
                        "unexpected token: expected ',', '>', or '['")
 
                    );
 
                }
 

	
 
                iter.consume();
 
            },
 
            State::Comma => {
 
                // Just parsed a comma, expecting an identifier or more closing
 
                // braces
 
                if Some(TokenKind::Ident) == next {
 
                    let element = consume_parser_type_ident(source, iter, symbols, heap, poly_vars, cur_scope, wrapping_definition, allow_inference)?;
 
                    elements.push(Entry{ element, depth: angle_depth });
 
                    state = State::Ident;
 
                } else if Some(TokenKind::CloseAngle) == next {
 
                    iter.consume();
 
                    angle_depth -= 1;
 
                    state = State::Close;
 
                } else if Some(TokenKind::ShiftRight) == next {
 
                    iter.consume();
 
                    angle_depth -= 2;
 
                    state = State::Close;
 
                } else {
 
                    return Err(ParseError::new_error_str_at_pos(
 
                        source, iter.last_valid_pos(),
 
                        "unexpected token: expected '>' or a type name"
 
                    ));
 
                }
 
            }
 
        }
 

	
 
        if angle_depth < 0 {
 
            return Err(ParseError::new_error_str_at_pos(source, iter.last_valid_pos(), "unmatched '>'"));
 
        } else if angle_depth == 0 {
 
            break;
 
        }
 
    }
 

	
 
    // If here then we found the correct number of angle braces. But we still
 
    // need to make sure that each encountered type has the correct number of
 
    // embedded types.
 
    let mut idx = 0;
 
    while idx < elements.len() {
 
        let cur_element = &elements[idx];
 
        let expected_subtypes = cur_element.element.variant.num_embedded();
 
        let mut encountered_subtypes = 0;
 
        for peek_idx in idx + 1..elements.len() {
 
            let peek_element = &elements[peek_idx];
 
            if peek_element.depth == cur_element.depth + 1 {
 
                encountered_subtypes += 1;
 
            } else if peek_element.depth <= cur_element.depth {
 
                break;
 
            }
 
        }
 

	
 
        if expected_subtypes != encountered_subtypes {
 
            if encountered_subtypes == 0 {
 
                // Case where we have elided the embedded types, all of them
 
                // should be inferred.
 
                if !allow_inference {
 
                    return Err(ParseError::new_error_str_at_span(
 
                        source, cur_element.element.full_span,
 
                        "type inference is not allowed here"
 
                    ));
 
                }
 

	
 
                // Insert the missing types
 
                let inserted_span = cur_element.element.full_span;
 
                let inserted_depth = cur_element.depth + 1;
 
                elements.reserve(expected_subtypes);
 
                for _ in 0..expected_subtypes {
 
                    elements.insert(idx + 1, Entry{
 
                        element: ParserTypeElement{ full_span: inserted_span, variant: ParserTypeVariant::Inferred },
 
                        depth: inserted_depth,
 
                    });
 
                }
 
            } else {
 
                // Mismatch in number of embedded types
 
                let expected_args_text = if expected_subtypes == 1 {
 
                    "polymorphic argument"
 
                } else {
 
                    "polymorphic arguments"
 
                };
 

	
 
                let maybe_infer_text = if allow_inference {
 
                    " (or none, to perform implicit type inference)"
 
                } else {
 
                    ""
 
                };
 

	
 
                return Err(ParseError::new_error_at_span(
 
                    source, cur_element.element.full_span,
 
                    format!(
 
                        "expected {} {}{}, but {} were provided",
 
                        expected_subtypes, expected_args_text, maybe_infer_text, encountered_subtypes
 
                    )
 
                ));
 
            }
 
        }
 

	
 
        idx += 1;
 
    }
 

	
 
    let mut constructed_elements = Vec::with_capacity(elements.len());
 
    for element in elements.into_iter() {
 
        constructed_elements.push(element.element);
 
    }
 

	
 
    Ok(ParserType{ elements: constructed_elements })
 
}
 

	
 
fn consume_parser_type_ident(
 
    source: &InputSource, iter: &mut TokenIter, symbols: &SymbolTable, heap: &Heap, poly_vars: &[Identifier],
 
    mut scope: SymbolScope, wrapping_definition: DefinitionId, allow_inference: bool,
 
) -> Result<ParserTypeElement, ParseError> {
 
    use ParserTypeVariant as PTV;
 
    let (mut type_text, mut type_span) = consume_any_ident(source, iter)?;
 

	
 
    let variant = match type_text {
 
        KW_TYPE_MESSAGE => PTV::Message,
 
        KW_TYPE_BOOL => PTV::Bool,
 
        KW_TYPE_UINT8 => PTV::UInt8,
 
        KW_TYPE_UINT16 => PTV::UInt16,
 
        KW_TYPE_UINT32 => PTV::UInt32,
 
        KW_TYPE_UINT64 => PTV::UInt64,
 
        KW_TYPE_SINT8 => PTV::SInt8,
 
        KW_TYPE_SINT16 => PTV::SInt16,
 
        KW_TYPE_SINT32 => PTV::SInt32,
 
        KW_TYPE_SINT64 => PTV::SInt64,
 
        KW_TYPE_IN_PORT => PTV::Input,
 
        KW_TYPE_OUT_PORT => PTV::Output,
 
        KW_TYPE_CHAR => PTV::Character,
 
        KW_TYPE_STRING => PTV::String,
 
        KW_TYPE_INFERRED => {
 
            if !allow_inference {
 
                return Err(ParseError::new_error_str_at_span(source, type_span, "type inference is not allowed here"));
 
            }
 

	
 
            PTV::Inferred
 
        },
 
        _ => {
 
            // Must be some kind of symbolic type
 
            let mut type_kind = None;
 
            for (poly_idx, poly_var) in poly_vars.iter().enumerate() {
 
                if poly_var.value.as_bytes() == type_text {
 
                    type_kind = Some(PTV::PolymorphicArgument(wrapping_definition, poly_idx));
 
                }
 
            }
 

	
 
            if type_kind.is_none() {
 
                // Check symbol table for definition. To be fair, the language
 
                // only allows a single namespace for now. That said:
 
                let last_symbol = symbols.get_symbol_by_name(scope, type_text);
 
                if last_symbol.is_none() {
 
                    return Err(ParseError::new_error_str_at_span(source, type_span, "unknown type"));
 
                }
 
                let mut last_symbol = last_symbol.unwrap();
 

	
 
                loop {
 
                    match &last_symbol.variant {
 
                        SymbolVariant::Module(symbol_module) => {
 
                            // Expecting more identifiers
 
                            if Some(TokenKind::ColonColon) != iter.next() {
 
                                return Err(ParseError::new_error_str_at_span(source, type_span, "expected type but got module"));
 
                            }
 

	
 
                            consume_token(source, iter, TokenKind::ColonColon)?;
 

	
 
                            // Consume next part of type and prepare for next
 
                            // lookup loop
 
                            let (next_text, next_span) = consume_any_ident(source, iter)?;
 
                            let old_text = type_text;
 
                            type_text = next_text;
 
                            type_span.end = next_span.end;
 
                            scope = SymbolScope::Module(symbol_module.root_id);
 

	
 
                            let new_symbol = symbols.get_symbol_by_name_defined_in_scope(scope, type_text);
 
                            if new_symbol.is_none() {
 
                                return Err(ParseError::new_error_at_span(
 
                                    source, next_span,
 
                                    format!(
 
                                        "unknown type '{}' in module '{}'",
 
                                        String::from_utf8_lossy(type_text),
 
                                        String::from_utf8_lossy(old_text)
 
                                    )
 
                                ));
 
                            }
 

	
 
                            last_symbol = new_symbol.unwrap();
 
                        },
 
                        SymbolVariant::Definition(symbol_definition) => {
 
                            let num_poly_vars = heap[symbol_definition.definition_id].poly_vars().len();
 
                            type_kind = Some(PTV::Definition(symbol_definition.definition_id, num_poly_vars));
 
                            break;
 
                        }
 
                    }
 
                }
 
            }
 

	
 
            debug_assert!(type_kind.is_some());
 
            type_kind.unwrap()
 
        },
 
    };
 

	
 
    Ok(ParserTypeElement{ full_span: type_span, variant })
 
}
 

	
 
/// Consumes polymorphic variables and throws them on the floor.
 
fn consume_polymorphic_vars_spilled(source: &InputSource, iter: &mut TokenIter) -> Result<(), ParseError> {
 
    maybe_consume_comma_separated_spilled(
 
        TokenKind::OpenAngle, TokenKind::CloseAngle, source, iter,
 
        |source, iter| {
 
            consume_ident(source, iter)?;
 
            Ok(())
 
        }, "a polymorphic variable"
 
    )?;
 
    Ok(())
 
}
 

	
 
/// Consumes the parameter list to functions/components
 
fn consume_parameter_list(
 
    source: &InputSource, iter: &mut TokenIter, ctx: &mut PassCtx, target: &mut Vec<ParameterId>,
 
    poly_vars: &[Identifier], scope: SymbolScope, definition_id: DefinitionId
 
) -> Result<(), ParseError> {
 
    consume_comma_separated(
 
        TokenKind::OpenParen, TokenKind::CloseParen, source, iter,
 
        |source, iter| {
 
            let (start_pos, _) = iter.next_positions();
 
            let parser_type = consume_parser_type(
 
                source, iter, &ctx.symbols, &ctx.heap, poly_vars, scope, definition_id, false
 
            )?;
 
            let identifier = consume_ident_interned(source, iter, ctx)?;
 
            let parameter_id = ctx.heap.alloc_parameter(|this| Parameter{
 
                this,
 
                span: InputSpan::from_positions(start_pos, identifier.span.end),
 
                parser_type,
 
                identifier
 
            });
 
            Ok(parameter_id)
 
        },
 
        target, "a parameter", "a parameter list", None
 
    )
 
}
 
\ No newline at end of file
src/protocol/parser/token_parsing.rs
Show inline comments
 
use crate::collections::StringRef;
 
use crate::protocol::ast::*;
 
use crate::protocol::input_source2::{
 
    InputSource2 as InputSource,
 
    InputPosition2 as InputPosition,
 
    InputSpan,
 
    ParseError,
 
};
 
use super::tokens::*;
 
use super::symbol_table2::*;
 
use super::{Module, ModuleCompilationPhase, PassCtx};
 

	
 
// Keywords
 
pub(crate) const KW_LET:       &'static [u8] = b"let";
 
pub(crate) const KW_AS:        &'static [u8] = b"as";
 
pub(crate) const KW_STRUCT:    &'static [u8] = b"struct";
 
pub(crate) const KW_ENUM:      &'static [u8] = b"enum";
 
pub(crate) const KW_UNION:     &'static [u8] = b"union";
 
pub(crate) const KW_FUNCTION:  &'static [u8] = b"function";
 
pub(crate) const KW_PRIMITIVE: &'static [u8] = b"primitive";
 
pub(crate) const KW_COMPOSITE: &'static [u8] = b"composite";
 
pub(crate) const KW_IMPORT:    &'static [u8] = b"import";
 

	
 
// Keywords - literals
 
pub(crate) const KW_LIT_TRUE:  &'static [u8] = b"true";
 
pub(crate) const KW_LIT_FALSE: &'static [u8] = b"false";
 
pub(crate) const KW_LIT_NULL:  &'static [u8] = b"null";
 

	
 
// Keywords - functions
 
pub(crate) const KW_FUNC_GET:    &'static [u8] = b"get";
 
pub(crate) const KW_FUNC_PUT:    &'static [u8] = b"put";
 
pub(crate) const KW_FUNC_FIRES:  &'static [u8] = b"fires";
 
pub(crate) const KW_FUNC_CREATE: &'static [u8] = b"create";
 
pub(crate) const KW_FUNC_LENGTH: &'static [u8] = b"length";
 

	
 
// Keywords - statements
 
pub(crate) const KW_STMT_CHANNEL:  &'static [u8] = b"channel";
 
pub(crate) const KW_STMT_IF:       &'static [u8] = b"if";
 
pub(crate) const KW_STMT_WHILE:    &'static [u8] = b"while";
 
pub(crate) const KW_STMT_BREAK:    &'static [u8] = b"break";
 
pub(crate) const KW_STMT_CONTINUE: &'static [u8] = b"continue";
 
pub(crate) const KW_STMT_GOTO:     &'static [u8] = b"goto";
 
pub(crate) const KW_STMT_RETURN:   &'static [u8] = b"return";
 
pub(crate) const KW_STMT_SYNC:     &'static [u8] = b"synchronous";
 
pub(crate) const KW_STMT_ASSERT:   &'static [u8] = b"assert";
 
pub(crate) const KW_STMT_NEW:      &'static [u8] = b"new";
 

	
 
// Keywords - types
 
pub(crate) const KW_TYPE_IN_PORT:  &'static [u8] = b"in";
 
pub(crate) const KW_TYPE_OUT_PORT: &'static [u8] = b"out";
 
pub(crate) const KW_TYPE_MESSAGE:  &'static [u8] = b"msg";
 
pub(crate) const KW_TYPE_BOOL:     &'static [u8] = b"bool";
 
pub(crate) const KW_TYPE_UINT8:    &'static [u8] = b"u8";
 
pub(crate) const KW_TYPE_UINT16:   &'static [u8] = b"u16";
 
pub(crate) const KW_TYPE_UINT32:   &'static [u8] = b"u32";
 
pub(crate) const KW_TYPE_UINT64:   &'static [u8] = b"u64";
 
pub(crate) const KW_TYPE_SINT8:    &'static [u8] = b"s8";
 
pub(crate) const KW_TYPE_SINT16:   &'static [u8] = b"s16";
 
pub(crate) const KW_TYPE_SINT32:   &'static [u8] = b"s32";
 
pub(crate) const KW_TYPE_SINT64:   &'static [u8] = b"s64";
 
pub(crate) const KW_TYPE_CHAR:     &'static [u8] = b"char";
 
pub(crate) const KW_TYPE_STRING:   &'static [u8] = b"string";
 
pub(crate) const KW_TYPE_INFERRED: &'static [u8] = b"auto";
 

	
 
/// Consumes a domain-name identifier: identifiers separated by a dot. For
 
/// simplification of later parsing and span identification the domain-name may
 
/// contain whitespace, but must reside on the same line.
 
pub(crate) fn consume_domain_ident<'a>(
 
    source: &'a InputSource, iter: &mut TokenIter
 
) -> Result<(&'a [u8], InputSpan), ParseError> {
 
    let (_, mut span) = consume_ident(source, iter)?;
 
    while let Some(TokenKind::Dot) = iter.next() {
 
        iter.consume();
 
        let (_, new_span) = consume_ident(source, iter)?;
 
        span.end = new_span.end;
 
    }
 

	
 
    // Not strictly necessary, but probably a reasonable restriction: this
 
    // simplifies parsing of module naming and imports.
 
    if span.begin.line != span.end.line {
 
        return Err(ParseError::new_error_str_at_span(source, span, "module names may not span multiple lines"));
 
    }
 

	
 
    // If module name consists of a single identifier, then it may not match any
 
    // of the reserved keywords
 
    let section = source.section_at_pos(span.begin, span.end);
 
    if is_reserved_keyword(section) {
 
        return Err(ParseError::new_error_str_at_span(source, span, "encountered reserved keyword"));
 
    }
 

	
 
    Ok((source.section_at_pos(span.begin, span.end), span))
 
}
 

	
 
/// Consumes a specific expected token. Be careful to only call this with tokens
 
/// that do not have a variable length.
 
pub(crate) fn consume_token(source: &InputSource, iter: &mut TokenIter, expected: TokenKind) -> Result<InputSpan, ParseError> {
 
    if Some(expected) != iter.next() {
 
        return Err(ParseError::new_error_at_pos(
 
            source, iter.last_valid_pos(),
 
            format!("expected '{}'", expected.token_chars())
 
        ));
 
    }
 
    let span = iter.next_span();
 
    iter.consume();
 
    Ok(span)
 
}
 

	
 
/// Consumes a comma-separated list of items if the opening delimiting token is
 
/// encountered. If not, then the iterator will remain at its current position.
 
/// Note that the potential cases may be:
 
/// - No opening delimiter encountered, then we return `false`.
 
/// - Both opening and closing delimiter encountered, but no items.
 
/// - Opening and closing delimiter encountered, and items were processed.
 
/// - Found an opening delimiter, but processing an item failed.
 
pub(crate) fn maybe_consume_comma_separated<T, F>(
 
    open_delim: TokenKind, close_delim: TokenKind, source: &InputSource, iter: &mut TokenIter,
 
    consumer_fn: F, target: &mut Vec<T>, item_name_and_article: &'static str,
 
    close_pos: Option<&mut InputPosition>
 
) -> Result<bool, ParseError>
 
    where F: Fn(&InputSource, &mut TokenIter) -> Result<T, ParseError>
 
{
 
    let mut next = iter.next();
 
    if Some(open_delim) != next {
 
        return Ok(false);
 
    }
 

	
 
    // Opening delimiter encountered, so must parse the comma-separated list.
 
    iter.consume();
 
    target.clear();
 
    let mut had_comma = true;
 
    loop {
 
        next = iter.next();
 
        if Some(close_delim) == next {
 
            if let Some(close_pos) = close_pos {
 
                // If requested return the position of the closing delimiter
 
                let (_, new_close_pos) = iter.next_positions();
 
                *close_pos = new_close_pos;
 
            }
 
            iter.consume();
 
            break;
 
        } else if !had_comma || next.is_none() {
 
            return Err(ParseError::new_error_at_pos(
 
                source, iter.last_valid_pos(),
 
                format!("expected a '{}', or {}", close_delim.token_chars(), item_name_and_article)
 
            ));
 
        }
 

	
 
        let new_item = consumer_fn(source, iter)?;
 
        target.push(new_item);
 

	
 
        next = iter.next();
 
        had_comma = next == Some(TokenKind::Comma);
 
        if had_comma {
 
            iter.consume();
 
        }
 
    }
 

	
 
    Ok(true)
 
}
 

	
 
pub(crate) fn maybe_consume_comma_separated_spilled<F: Fn(&InputSource, &mut TokenIter) -> Result<(), ParseError>>(
 
    open_delim: TokenKind, close_delim: TokenKind, source: &InputSource, iter: &mut TokenIter,
 
    consumer_fn: F, item_name_and_article: &'static str
 
) -> Result<bool, ParseError> {
 
    let mut next = iter.next();
 
    if Some(open_delim) != next {
 
        return Ok(false);
 
    }
 

	
 
    iter.consume();
 
    let mut had_comma = true;
 
    loop {
 
        next = iter.next();
 
        if Some(close_delim) == next {
 
            iter.consume();
 
            break;
 
        } else if !had_comma {
 
            return Err(ParseError::new_error_at_pos(
 
                source, iter.last_valid_pos(),
 
                format!("expected a '{}', or {}", close_delim.token_chars(), item_name_and_article)
 
            ));
 
        }
 

	
 
        consumer_fn(source, iter)?;
 
        next = iter.next();
 
        had_comma = next == Some(TokenKind::Comma);
 
        if had_comma {
 
            iter.consume();
 
        }
 
    }
 

	
 
    Ok(true)
 
}
 

	
 
/// Consumes a comma-separated list and expected the opening and closing
 
/// characters to be present. The returned array may still be empty
 
pub(crate) fn consume_comma_separated<T, F>(
 
    open_delim: TokenKind, close_delim: TokenKind, source: &InputSource, iter: &mut TokenIter,
 
    consumer_fn: F, target: &mut Vec<T>, item_name_and_article: &'static str,
 
    list_name_and_article: &'static str, close_pos: Option<&mut InputPosition>
 
) -> Result<(), ParseError>
 
    where F: Fn(&InputSource, &mut TokenIter) -> Result<T, ParseError>
 
{
 
    let first_pos = iter.last_valid_pos();
 
    match maybe_consume_comma_separated(
 
        open_delim, close_delim, source, iter, consumer_fn, target,
 
        item_name_and_article, close_pos
 
    ) {
 
        Ok(true) => Ok(()),
 
        Ok(false) => {
 
            return Err(ParseError::new_error_at_pos(
 
                source, first_pos,
 
                format!("expected {}", list_name_and_article)
 
            ));
 
        },
 
        Err(err) => Err(err)
 
    }
 
}
 

	
 
/// Consumes an integer literal, may be binary, octal, hexadecimal or decimal,
 
/// and may have separating '_'-characters.
 
pub(crate) fn consume_integer_literal(source: &InputSource, iter: &mut TokenIter, buffer: &mut String) -> Result<(u64, InputSpan), ParseError> {
 
    if Some(TokenKind::Integer) != iter.next() {
 
        return Err(ParseError::new_error_str_at_pos(source, iter.last_valid_pos(), "expected an integer literal"));
 
    }
 
    let integer_span = iter.next_span();
 
    iter.consume();
 

	
 
    let integer_text = source.section_at_span(integer_span);
 

	
 
    // Determine radix and offset from prefix
 
    let (radix, input_offset, radix_name) =
 
        if integer_text.starts_with(b"0b") || integer_text.starts_with(b"0B") {
 
            // Binary number
 
            (2, 2, "binary")
 
        } else if integer_text.starts_with(b"0o") || integer_text.starts_with(b"0O") {
 
            // Octal number
 
            (8, 2, "octal")
 
        } else if integer_text.starts_with(b"0x") || integer_text.starts_with(b"0X") {
 
            // Hexadecimal number
 
            (16, 2, "hexadecimal")
 
        } else {
 
            (10, 0, "decimal")
 
        };
 

	
 
    // Take out any of the separating '_' characters
 
    buffer.clear();
 
    for char_idx in input_offset..integer_text.len() {
 
        let char = integer_text[char_idx];
 
        if char == b'_' {
 
            continue;
 
        }
 
        if !char.is_ascii_digit() {
 
            return Err(ParseError::new_error_at_span(
 
                source, integer_span,
 
                format!("incorrectly formatted {} number", radix_name)
 
            ));
 
        }
 
        buffer.push(char::from(char));
 
    }
 

	
 
    // Use the cleaned up string to convert to integer
 
    match u64::from_str_radix(&buffer, radix) {
 
        Ok(number) => Ok((number, integer_span)),
 
        Err(_) => Err(ParseError::new_error_at_span(
 
            source, integer_span,
 
            format!("incorrectly formatted {} number", radix_name)
 
        )),
 
    }
 
}
 

	
 
/// Consumes a character literal. We currently support a limited number of
 
/// backslash-escaped characters
 
pub(crate) fn consume_character_literal(source: &InputSource, iter: &mut TokenIter, buffer: &mut String) -> Result<char, ParseError> {
 
pub(crate) fn consume_character_literal(
 
    source: &InputSource, iter: &mut TokenIter
 
) -> Result<(char, InputSpan), ParseError> {
 
    if Some(TokenKind::Character) != iter.next() {
 
        return Err(ParseError::new_error_str_at_pos(source, iter.last_valid_pos(), "expected a character literal"));
 
    }
 
    let char_span = iter.next_span();
 
    let span = iter.next_span();
 
    iter.consume();
 

	
 
    let char_text = source.section_at_span(char_span);
 
    let char_text = source.section_at_span(span);
 
    if !char_text.is_ascii() {
 
        return Err(ParseError::new_error_str_at_span(
 
            source, span, "expected an ASCII character literal"
 
        ));
 
    }
 

	
 
    //
 
    match char_text.len() {
 
        0 => return Err(ParseError::new_error_str_at_span(source, span, "too little characters in character literal")),
 
        1 => {
 
            // We already know the text is ascii, so just throw an error if we have the escape
 
            // character.
 
            if char_text[0] == b'\\' {
 
                return Err(ParseError::new_error_str_at_span(source, span, "escape character without subsequent character"));
 
            }
 
            return Ok((char_text[0] as char, span));
 
        },
 
        2 => {
 
            if char_text[0] == b'\\' {
 
                let result = parse_escaped_character(char_text[1])?;
 
                return Ok((result, span))
 
            }
 
        },
 
        _ => {}
 
    }
 

	
 
    return Err(ParseError::new_error_str_at_span(source, span, "too many characters in character literal"))
 
}
 

	
 
/// Consumes a string literal. We currently support a limited number of
 
/// backslash-escaped characters.
 
pub(crate) fn consume_string_literal(source: &InputSource, iter: &mut TokenIter, buffer: &mut String) -> Result<(>
 
/// backslash-escaped characters. Note that the result is stored in the
 
/// buffer.
 
pub(crate) fn consume_string_literal(
 
    source: &InputSource, iter: &mut TokenIter, buffer: &mut String
 
) -> Result<InputSpan, ParseError> {
 
    if Some(TokenKind::String) != iter.next() {
 
        return Err(ParseError::new_error_str_at_pos(source, iter.last_valid_pos(), "expected a string literal"));
 
    }
 

	
 
    buffer.clear();
 
    let span = iter.next_span();
 
    iter.consume();
 

	
 
    let text = source.section_at_span(span);
 
    if !text.is_ascii() {
 
        return Err(ParseError::new_error_str_at_span(source, span, "expected an ASCII string literal"));
 
    }
 
    buffer.reserve(text.len());
 

	
 
    let mut was_escape = false;
 
    for idx in 0..text.len() {
 
        let cur = text[idx];
 
        if cur != b'\\' {
 
            if was_escape {
 
                let to_push = parse_escaped_character(cur)?;
 
                buffer.push(to_push);
 
            } else {
 
                buffer.push(cur as char);
 
            }
 
            was_escape = false;
 
        } else {
 
            was_escape = true;
 
        }
 
    }
 

	
 
    Ok(span)
 
}
 

	
 
fn parse_escaped_character(v: u8) -> Result<char, ParseError> {
 
    let result = match v {
 
        b'r' => '\r',
 
        b'n' => '\n',
 
        b't' => '\t',
 
        b'0' => '\0',
 
        b'\\' => '\\',
 
        b'\'' => '\'',
 
        b'"' => '"',
 
        v => return Err(ParseError::new_error_at_span(
 
            source, span, format!("unexpected escaped character '{}'", v)
 
        )),
 
    };
 
    Ok(result)
 
}
 

	
 
pub(crate) fn consume_pragma<'a>(source: &'a InputSource, iter: &mut TokenIter) -> Result<(&'a [u8], InputPosition, InputPosition), ParseError> {
 
    if Some(TokenKind::Pragma) != iter.next() {
 
        return Err(ParseError::new_error_str_at_pos(source, iter.last_valid_pos(), "expected a pragma"));
 
    }
 
    let (pragma_start, pragma_end) = iter.next_positions();
 
    iter.consume();
 
    Ok((source.section_at_pos(pragma_start, pragma_end), pragma_start, pragma_end))
 
}
 

	
 
pub(crate) fn has_ident(source: &InputSource, iter: &mut TokenIter, expected: &[u8]) -> bool {
 
    peek_ident(source, iter).map_or(false, |section| section == expected)
 
}
 

	
 
pub(crate) fn peek_ident<'a>(source: &'a InputSource, iter: &mut TokenIter) -> Option<&'a [u8]> {
 
    if Some(TokenKind::Ident) == iter.next() {
 
        let (start, end) = iter.next_positions();
 
        return Some(source.section_at_pos(start, end))
 
    }
 

	
 
    None
 
}
 

	
 
/// Consumes any identifier and returns it together with its span. Does not
 
/// check if the identifier is a reserved keyword.
 
pub(crate) fn consume_any_ident<'a>(
 
    source: &'a InputSource, iter: &mut TokenIter
 
) -> Result<(&'a [u8], InputSpan), ParseError> {
 
    if Some(TokenKind::Ident) != iter.next() {
 
        return Err(ParseError::new_error_str_at_pos(source, iter.last_valid_pos(), "expected an identifier"));
 
    }
 
    let (ident_start, ident_end) = iter.next_positions();
 
    iter.consume();
 
    Ok((source.section_at_pos(ident_start, ident_end), InputSpan::from_positions(ident_start, ident_end)))
 
}
 

	
 
/// Consumes a specific identifier. May or may not be a reserved keyword.
 
pub(crate) fn consume_exact_ident(source: &InputSource, iter: &mut TokenIter, expected: &[u8]) -> Result<InputSpan, ParseError> {
 
    let (ident, pos) = consume_any_ident(source, iter)?;
 
    if ident != expected {
 
        debug_assert!(expected.is_ascii());
 
        return Err(ParseError::new_error_at_pos(
 
            source, iter.last_valid_pos(),
 
            format!("expected the text '{}'", &String::from_utf8_lossy(expected))
 
        ));
 
    }
 
    Ok(pos)
 
}
 

	
 
/// Consumes an identifier that is not a reserved keyword and returns it
 
/// together with its span.
 
pub(crate) fn consume_ident<'a>(
 
    source: &'a InputSource, iter: &mut TokenIter
 
) -> Result<(&'a [u8], InputSpan), ParseError> {
 
    let (ident, span) = consume_any_ident(source, iter)?;
 
    if is_reserved_keyword(ident) {
 
        return Err(ParseError::new_error_str_at_span(source, span, "encountered reserved keyword"));
 
    }
 

	
 
    Ok((ident, span))
 
}
 

	
 
/// Consumes an identifier and immediately intern it into the `StringPool`
 
pub(crate) fn consume_ident_interned(
 
    source: &InputSource, iter: &mut TokenIter, ctx: &mut PassCtx
 
) -> Result<Identifier, ParseError> {
 
    let (value, span) = consume_ident(source, iter)?;
 
    let value = ctx.pool.intern(value);
 
    Ok(Identifier{ span, value })
 
}
 

	
 
fn is_reserved_definition_keyword(text: &[u8]) -> bool {
 
    match text {
 
        KW_STRUCT | KW_ENUM | KW_UNION | KW_FUNCTION | KW_PRIMITIVE | KW_COMPOSITE => true,
 
        _ => false,
 
    }
 
}
 

	
 
fn is_reserved_statement_keyword(text: &[u8]) -> bool {
 
    match text {
 
        KW_IMPORT | KW_AS |
 
        KW_STMT_CHANNEL | KW_STMT_IF | KW_STMT_WHILE |
 
        KW_STMT_BREAK | KW_STMT_CONTINUE | KW_STMT_GOTO | KW_STMT_RETURN |
 
        KW_STMT_SYNC | KW_STMT_ASSERT | KW_STMT_NEW => true,
 
        _ => false,
 
    }
 
}
 

	
 
fn is_reserved_expression_keyword(text: &[u8]) -> bool {
 
    match text {
 
        KW_LET |
 
        KW_LIT_TRUE | KW_LIT_FALSE | KW_LIT_NULL |
 
        KW_FUNC_GET | KW_FUNC_PUT | KW_FUNC_FIRES | KW_FUNC_CREATE | KW_FUNC_LENGTH => true,
 
        _ => false,
 
    }
 
}
 

	
 
fn is_reserved_type_keyword(text: &[u8]) -> bool {
 
    match text {
 
        KW_TYPE_IN_PORT | KW_TYPE_OUT_PORT | KW_TYPE_MESSAGE | KW_TYPE_BOOL |
 
        KW_TYPE_UINT8 | KW_TYPE_UINT16 | KW_TYPE_UINT32 | KW_TYPE_UINT64 |
 
        KW_TYPE_SINT8 | KW_TYPE_SINT16 | KW_TYPE_SINT32 | KW_TYPE_SINT64 |
 
        KW_TYPE_CHAR | KW_TYPE_STRING |
 
        KW_TYPE_INFERRED => true,
 
        _ => false,
 
    }
 
}
 

	
 
fn is_reserved_keyword(text: &[u8]) -> bool {
 
    return
 
        is_reserved_definition_keyword(text) ||
 
        is_reserved_statement_keyword(text) ||
 
        is_reserved_expression_keyword(text) ||
 
        is_reserved_type_keyword(text);
 
}
 

	
 
pub(crate) fn seek_module(modules: &[Module], root_id: RootId) -> Option<&Module> {
 
    for module in modules {
 
        if module.root_id == root_id {
 
            return Some(module)
 
        }
 
    }
 

	
 
    return None
 
}
 

	
 
/// Constructs a human-readable message indicating why there is a conflict of
 
/// symbols.
 
// Note: passing the `module_idx` is not strictly necessary, but will prevent
 
// programmer mistakes during development: we get a conflict because we're
 
// currently parsing a particular module.
 
pub(crate) fn construct_symbol_conflict_error(
 
    modules: &[Module], module_idx: usize, ctx: &PassCtx, new_symbol: &Symbol, old_symbol: &Symbol
 
) -> ParseError {
 
    let module = &modules[module_idx];
 
    let get_symbol_span_and_msg = |symbol: &Symbol| -> (String, InputSpan) {
 
        match symbol.introduced_at {
 
            Some(import_id) => {
 
                // Symbol is being imported
 
                let import = &ctx.heap[import_id];
 
                match import {
 
                    Import::Module(import) => (
 
                        format!("the module aliased as '{}' imported here", symbol.name.as_str()),
 
                        import.span
 
                    ),
 
                    Import::Symbols(symbols) => (
 
                        format!("the type '{}' imported here", symbol.name.as_str()),
 
                        symbols.span
 
                    ),
 
                }
 
            },
 
            None => {
 
                // Symbol is being defined
 
                debug_assert_eq!(symbol.defined_in_module, module.root_id);
 
                debug_assert_ne!(symbol.definition.symbol_class(), SymbolClass::Module);
 
                (
 
                    format!("the type '{}' defined here", symbol.name.as_str()),
 
                    symbol.identifier_span
 
                )
 
            }
 
        }
 
    };
 

	
 
    let (new_symbol_msg, new_symbol_span) = get_symbol_span_and_msg(new_symbol);
 
    let (old_symbol_msg, old_symbol_span) = get_symbol_span_and_msg(old_symbol);
 
    return ParseError::new_error_at_span(
 
        &module.source, new_symbol_span, format!("symbol is defined twice: {}", new_symbol_msg)
 
    ).with_info_at_span(
 
        &module.source, old_symbol_span, format!("it conflicts with {}", old_symbol_msg)
 
    )
 
}
 
\ No newline at end of file
0 comments (0 inline, 0 general)