From 4839cd3569c5f1ddcabf754e1ec9f53f7955b20f 2021-04-23 18:47:10 From: MH Date: 2021-04-23 18:47:10 Subject: [PATCH] WIP on compiler rearchitecting --- diff --git a/src/collections/mod.rs b/src/collections/mod.rs index f5817538fd52a1d90cb624295b79e5866bb87993..c2d4b9aa09f48075dcd96913363bab8cc9be3fda 100644 --- a/src/collections/mod.rs +++ b/src/collections/mod.rs @@ -2,4 +2,4 @@ mod string_pool; mod scoped_buffer; pub(crate) use string_pool::{StringPool, StringRef}; -pub(crate) use scoped_buffer::ScopedBuffer; \ No newline at end of file +pub(crate) use scoped_buffer::{ScopedBuffer, ScopedSection}; \ No newline at end of file diff --git a/src/protocol/ast.rs b/src/protocol/ast.rs index 4a1b4a0f12cb13cb1c2d1b01afebeea4e342142c..9555ae70cd547e1518596c76def64ad49b8a6c3f 100644 --- a/src/protocol/ast.rs +++ b/src/protocol/ast.rs @@ -137,7 +137,6 @@ define_new_ast_id!(BlockStatementId, StatementId, index(BlockStatement, Statemen define_new_ast_id!(LocalStatementId, StatementId, index(LocalStatement, Statement::Local, statements), alloc(alloc_local_statement)); define_new_ast_id!(MemoryStatementId, LocalStatementId); define_new_ast_id!(ChannelStatementId, LocalStatementId); -define_new_ast_id!(SkipStatementId, StatementId, index(SkipStatement, Statement::Skip, statements), alloc(alloc_skip_statement)); define_new_ast_id!(LabeledStatementId, StatementId, index(LabeledStatement, Statement::Labeled, statements), alloc(alloc_labeled_statement)); define_new_ast_id!(IfStatementId, StatementId, index(IfStatement, Statement::If, statements), alloc(alloc_if_statement)); define_new_ast_id!(EndIfStatementId, StatementId, index(EndIfStatement, Statement::EndIf, statements), alloc(alloc_end_if_statement)); @@ -162,7 +161,6 @@ define_new_ast_id!(UnaryExpressionId, ExpressionId, index(UnaryExpression, Expre define_new_ast_id!(IndexingExpressionId, ExpressionId, index(IndexingExpression, Expression::Indexing, expressions), alloc(alloc_indexing_expression)); define_new_ast_id!(SlicingExpressionId, ExpressionId, index(SlicingExpression, Expression::Slicing, expressions), alloc(alloc_slicing_expression)); define_new_ast_id!(SelectExpressionId, ExpressionId, index(SelectExpression, Expression::Select, expressions), alloc(alloc_select_expression)); -define_new_ast_id!(ArrayExpressionId, ExpressionId, index(ArrayExpression, Expression::Array, expressions), alloc(alloc_array_expression)); define_new_ast_id!(LiteralExpressionId, ExpressionId, index(LiteralExpression, Expression::Literal, expressions), alloc(alloc_literal_expression)); define_new_ast_id!(CallExpressionId, ExpressionId, index(CallExpression, Expression::Call, expressions), alloc(alloc_call_expression)); define_new_ast_id!(VariableExpressionId, ExpressionId, index(VariableExpression, Expression::Variable, expressions), alloc(alloc_variable_expression)); @@ -842,28 +840,6 @@ impl Scope { } } -pub trait VariableScope { - fn parent_scope(&self, h: &Heap) -> Option; - fn get_variable(&self, h: &Heap, id: &Identifier) -> Option; -} - -impl VariableScope for Scope { - fn parent_scope(&self, h: &Heap) -> Option { - match self { - Scope::Definition(def) => h[*def].parent_scope(h), - Scope::Regular(stmt) => h[*stmt].parent_scope(h), - Scope::Synchronous((stmt, _)) => h[*stmt].parent_scope(h), - } - } - fn get_variable(&self, h: &Heap, id: &Identifier) -> Option { - match self { - Scope::Definition(def) => h[*def].get_variable(h, id), - Scope::Regular(stmt) => h[*stmt].get_variable(h, id), - Scope::Synchronous((stmt, _)) => h[*stmt].get_variable(h, id), - } - } -} - #[derive(Debug, Clone)] pub enum Variable { Parameter(Parameter), @@ -918,9 +894,8 @@ pub struct Parameter { pub struct Local { pub this: LocalId, // Phase 1: parser - pub position: InputPosition, - pub parser_type: ParserTypeId, pub identifier: Identifier, + pub parser_type: ParserType, // Phase 2: linker pub relative_pos_in_block: u32, } @@ -1062,21 +1037,6 @@ impl Definition { } } -impl VariableScope for Definition { - fn parent_scope(&self, _h: &Heap) -> Option { - None - } - fn get_variable(&self, h: &Heap, id: &Identifier) -> Option { - for ¶meter_id in self.parameters().iter() { - let parameter = &h[parameter_id]; - if parameter.identifier == *id { - return Some(parameter_id.0); - } - } - None - } -} - #[derive(Debug, Clone)] pub struct StructFieldDefinition { pub span: InputSpan, @@ -1189,10 +1149,13 @@ impl ComponentDefinition { } } +// Note that we will have function definitions for builtin functions as well. In +// that case the span, the identifier span and the body are all invalid. #[derive(Debug, Clone)] pub struct FunctionDefinition { pub this: FunctionDefinitionId, // Phase 1: symbol scanning + pub builtin: bool, pub span: InputSpan, pub identifier: Identifier, pub poly_vars: Vec, @@ -1205,8 +1168,10 @@ pub struct FunctionDefinition { impl FunctionDefinition { pub(crate) fn new_empty(this: FunctionDefinitionId, span: InputSpan, identifier: Identifier, poly_vars: Vec) -> Self { Self { - this, span, identifier, poly_vars, - return_type: ParserTypeId::new_invalid(), + this, + builtin: false, + span, identifier, poly_vars, + return_types: Vec::new(), parameters: Vec::new(), body: StatementId::new_invalid(), } @@ -1217,7 +1182,6 @@ impl FunctionDefinition { pub enum Statement { Block(BlockStatement), Local(LocalStatement), - Skip(SkipStatement), Labeled(LabeledStatement), If(IfStatement), EndIf(EndIfStatement), @@ -1228,13 +1192,18 @@ pub enum Statement { Synchronous(SynchronousStatement), EndSynchronous(EndSynchronousStatement), Return(ReturnStatement), - Assert(AssertStatement), Goto(GotoStatement), New(NewStatement), Expression(ExpressionStatement), } impl Statement { + pub fn is_block(&self) -> bool { + match self { + Statement::Block(_) => true, + _ => false, + } + } pub fn as_block(&self) -> &BlockStatement { match self { Statement::Block(result) => result, @@ -1367,12 +1336,6 @@ impl Statement { _ => panic!("Unable to cast `Statement` to `ReturnStatement`"), } } - pub fn as_assert(&self) -> &AssertStatement { - match self { - Statement::Assert(result) => result, - _ => panic!("Unable to cast `Statement` to `AssertStatement`"), - } - } pub fn as_goto(&self) -> &GotoStatement { match self { Statement::Goto(result) => result, @@ -1397,6 +1360,23 @@ impl Statement { _ => panic!("Unable to cast `Statement` to `ExpressionStatement`"), } } + pub fn span(&self) -> InputSpan { + match self { + Statement::Block(v) => v.span, + Statement::Local(v) => v.span(), + Statement::Labeled(v) => v.label.span, + Statement::If(v) => v.span, + Statement::While(v) => v.span, + Statement::Break(v) => v.span, + Statement::Continue(v) => v.span, + Statement::Synchronous(v) => v.span, + Statement::Return(v) => v.span, + Statement::Goto(v) => v.span, + Statement::New(v) => v.span, + Statement::Expression(v) => v.span, + Statement::EndIf(_) | Statement::EndWhile(_) | Statement::EndSynchronous(_) => unreachable!(), + } + } pub fn link_next(&mut self, next: StatementId) { match self { Statement::Block(_) => todo!(), @@ -1404,11 +1384,9 @@ impl Statement { LocalStatement::Channel(stmt) => stmt.next = Some(next), LocalStatement::Memory(stmt) => stmt.next = Some(next), }, - Statement::Skip(stmt) => stmt.next = Some(next), Statement::EndIf(stmt) => stmt.next = Some(next), Statement::EndWhile(stmt) => stmt.next = Some(next), Statement::EndSynchronous(stmt) => stmt.next = Some(next), - Statement::Assert(stmt) => stmt.next = Some(next), Statement::New(stmt) => stmt.next = Some(next), Statement::Expression(stmt) => stmt.next = Some(next), Statement::Return(_) @@ -1427,7 +1405,8 @@ impl Statement { pub struct BlockStatement { pub this: BlockStatementId, // Phase 1: parser - pub span: InputSpan, + pub is_implicit: bool, + pub span: InputSpan, // of the complete block pub statements: Vec, // Phase 2: linker pub parent_scope: Option, @@ -1465,21 +1444,6 @@ impl BlockStatement { } } -impl VariableScope for BlockStatement { - fn parent_scope(&self, _h: &Heap) -> Option { - self.parent_scope.clone() - } - fn get_variable(&self, h: &Heap, id: &Identifier) -> Option { - for local_id in self.locals.iter() { - let local = &h[*local_id]; - if local.identifier == *id { - return Some(local_id.0); - } - } - None - } -} - #[derive(Debug, Clone)] pub enum LocalStatement { Memory(MemoryStatement), @@ -1505,6 +1469,12 @@ impl LocalStatement { _ => panic!("Unable to cast `LocalStatement` to `ChannelStatement`"), } } + pub fn span(&self) -> InputSpan { + match self { + LocalStatement::Channel(v) => v.span, + LocalStatement::Memory(v) => v.span, + } + } pub fn next(&self) -> Option { match self { LocalStatement::Memory(stmt) => stmt.next, @@ -1517,7 +1487,7 @@ impl LocalStatement { pub struct MemoryStatement { pub this: MemoryStatementId, // Phase 1: parser - pub position: InputPosition, + pub span: InputSpan, pub variable: LocalId, // Phase 2: linker pub next: Option, @@ -1532,7 +1502,7 @@ pub struct MemoryStatement { pub struct ChannelStatement { pub this: ChannelStatementId, // Phase 1: parser - pub position: InputPosition, + pub span: InputSpan, // of the "channel" keyword pub from: LocalId, // output pub to: LocalId, // input // Phase 2: linker @@ -1540,20 +1510,10 @@ pub struct ChannelStatement { pub next: Option, } -#[derive(Debug, Clone)] -pub struct SkipStatement { - pub this: SkipStatementId, - // Phase 1: parser - pub position: InputPosition, - // Phase 2: linker - pub next: Option, -} - #[derive(Debug, Clone)] pub struct LabeledStatement { pub this: LabeledStatementId, // Phase 1: parser - pub position: InputPosition, pub label: Identifier, pub body: StatementId, // Phase 2: linker @@ -1565,10 +1525,10 @@ pub struct LabeledStatement { pub struct IfStatement { pub this: IfStatementId, // Phase 1: parser - pub position: InputPosition, + pub span: InputSpan, // of the "if" keyword pub test: ExpressionId, - pub true_body: StatementId, - pub false_body: StatementId, + pub true_body: BlockStatementId, + pub false_body: Option, // Phase 2: linker pub end_if: Option, } @@ -1586,9 +1546,9 @@ pub struct EndIfStatement { pub struct WhileStatement { pub this: WhileStatementId, // Phase 1: parser - pub position: InputPosition, + pub span: InputSpan, // of the "while" keyword pub test: ExpressionId, - pub body: StatementId, + pub body: BlockStatementId, // Phase 2: linker pub end_while: Option, pub in_sync: Option, @@ -1607,7 +1567,7 @@ pub struct EndWhileStatement { pub struct BreakStatement { pub this: BreakStatementId, // Phase 1: parser - pub position: InputPosition, + pub span: InputSpan, // of the "break" keyword pub label: Option, // Phase 2: linker pub target: Option, @@ -1617,7 +1577,7 @@ pub struct BreakStatement { pub struct ContinueStatement { pub this: ContinueStatementId, // Phase 1: parser - pub position: InputPosition, + pub span: InputSpan, // of the "continue" keyword pub label: Option, // Phase 2: linker pub target: Option, @@ -1627,30 +1587,13 @@ pub struct ContinueStatement { pub struct SynchronousStatement { pub this: SynchronousStatementId, // Phase 1: parser - pub position: InputPosition, - // pub parameters: Vec, - pub body: StatementId, + pub span: InputSpan, // of the "sync" keyword + pub body: BlockStatementId, // Phase 2: linker pub end_sync: Option, pub parent_scope: Option, } -impl VariableScope for SynchronousStatement { - fn parent_scope(&self, _h: &Heap) -> Option { - self.parent_scope.clone() - } - fn get_variable(&self, _h: &Heap, _id: &Identifier) -> Option { - // TODO: Another case of "where was this used for?" - // for parameter_id in self.parameters.iter() { - // let parameter = &h[*parameter_id]; - // if parameter.identifier.value == id.value { - // return Some(parameter_id.0); - // } - // } - None - } -} - #[derive(Debug, Clone)] pub struct EndSynchronousStatement { pub this: EndSynchronousStatementId, @@ -1664,25 +1607,15 @@ pub struct EndSynchronousStatement { pub struct ReturnStatement { pub this: ReturnStatementId, // Phase 1: parser - pub position: InputPosition, - pub expression: ExpressionId, -} - -#[derive(Debug, Clone)] -pub struct AssertStatement { - pub this: AssertStatementId, - // Phase 1: parser - pub position: InputPosition, - pub expression: ExpressionId, - // Phase 2: linker - pub next: Option, + pub span: InputSpan, // of the "return" keyword + pub expressions: Vec, } #[derive(Debug, Clone)] pub struct GotoStatement { pub this: GotoStatementId, // Phase 1: parser - pub position: InputPosition, + pub span: InputSpan, // of the "goto" keyword pub label: Identifier, // Phase 2: linker pub target: Option, @@ -1692,7 +1625,7 @@ pub struct GotoStatement { pub struct NewStatement { pub this: NewStatementId, // Phase 1: parser - pub position: InputPosition, + pub span: InputSpan, // of the "new" keyword pub expression: CallExpressionId, // Phase 2: linker pub next: Option, @@ -1702,7 +1635,7 @@ pub struct NewStatement { pub struct ExpressionStatement { pub this: ExpressionStatementId, // Phase 1: parser - pub position: InputPosition, + pub span: InputSpan, pub expression: ExpressionId, // Phase 2: linker pub next: Option, @@ -1926,7 +1859,7 @@ pub struct AssignmentExpression { pub struct BindingExpression { pub this: BindingExpressionId, // Phase 1: parser - pub position: InputPosition, + pub span: InputSpan, pub left: LiteralExpressionId, pub right: ExpressionId, // Phase 2: linker @@ -2051,30 +1984,15 @@ pub struct SelectExpression { pub concrete_type: ConcreteType, } -#[derive(Debug, Clone)] -pub struct ArrayExpression { - pub this: ArrayExpressionId, - // Phase 1: parser - pub span: InputSpan, // from the opening to closing delimiter - pub elements: Vec, - // Phase 2: linker - pub parent: ExpressionParent, - // Phase 3: type checking - pub concrete_type: ConcreteType, -} - -// TODO: @tokenizer Symbolic function calls are ambiguous with union literals -// that accept embedded values (although the polymorphic arguments are placed -// differently). To prevent double work we parse as CallExpression, and during -// validation we may transform the expression into a union literal. #[derive(Debug, Clone)] pub struct CallExpression { pub this: CallExpressionId, // Phase 1: parser - pub position: InputPosition, + pub span: InputSpan, + pub parser_type: ParserType, // of the function call pub method: Method, pub arguments: Vec, - pub poly_args: Vec, // if symbolic will be determined during validation phase + pub definition: DefinitionId, // Phase 2: linker pub parent: ExpressionParent, // Phase 3: type checking @@ -2083,17 +2001,21 @@ pub struct CallExpression { #[derive(Debug, Clone)] pub enum Method { + // Builtin Get, Put, Fires, Create, - Symbolic(MethodSymbolic) + Length, + Assert, + UserFunction, + UserComponent, } #[derive(Debug, Clone)] pub struct MethodSymbolic { - pub(crate) identifier: NamespacedIdentifier, - pub(crate) definition: Option + pub(crate) parser_type: ParserType, + pub(crate) definition: DefinitionId } #[derive(Debug, Clone)] @@ -2119,6 +2041,7 @@ pub enum Literal { Struct(LiteralStruct), Enum(LiteralEnum), Union(LiteralUnion), + Array(Vec), } impl Literal { @@ -2173,31 +2096,29 @@ pub struct LiteralStructField { #[derive(Debug, Clone)] pub struct LiteralStruct { // Phase 1: parser - pub(crate) identifier: NamespacedIdentifier, + pub(crate) parser_type: ParserType, pub(crate) fields: Vec, - // Phase 2: linker - pub(crate) poly_args2: Vec, // taken from identifier once linked to a definition - pub(crate) definition: Option + pub(crate) definition: DefinitionId, } #[derive(Debug, Clone)] pub struct LiteralEnum { // Phase 1: parser - pub(crate) identifier: NamespacedIdentifier, + pub(crate) parser_type: ParserType, + pub(crate) variant: Identifier, + pub(crate) definition: DefinitionId, // Phase 2: linker - pub(crate) poly_args2: Vec, // taken from identifier once linked to a definition - pub(crate) definition: Option, pub(crate) variant_idx: usize, // as present in the type table } #[derive(Debug, Clone)] pub struct LiteralUnion { // Phase 1: parser - pub(crate) identifier: NamespacedIdentifier, + pub(crate) parser_type: ParserType, + pub(crate) variant: Identifier, pub(crate) values: Vec, + pub(crate) definition: DefinitionId, // Phase 2: linker - pub(crate) poly_args2: Vec, // taken from identifier once linked to a definition - pub(crate) definition: Option, pub(crate) variant_idx: usize, // as present in type table } @@ -2205,8 +2126,7 @@ pub struct LiteralUnion { pub struct VariableExpression { pub this: VariableExpressionId, // Phase 1: parser - pub position: InputPosition, - pub identifier: NamespacedIdentifier, + pub identifier: Identifier, // Phase 2: linker pub declaration: Option, pub parent: ExpressionParent, diff --git a/src/protocol/parser/pass_definitions.rs b/src/protocol/parser/pass_definitions.rs index e315a1e9439549529d8a20d41d72ec15621517e8..93e2f6d84bae1012ea81696bad3018a5b21c3dbf 100644 --- a/src/protocol/parser/pass_definitions.rs +++ b/src/protocol/parser/pass_definitions.rs @@ -8,6 +8,9 @@ use crate::collections::*; /// Parses all the tokenized definitions into actual AST nodes. pub(crate) struct PassDefinitions { + // State + cur_definition: DefinitionId, + // Temporary buffers of various kinds buffer: String, identifiers: Vec, struct_fields: Vec, @@ -82,9 +85,6 @@ impl PassDefinitions { Ok(()) } - // TODO: @Cleanup, still not sure about polymorphic variable parsing. Pre-parsing the variables - // allows us to directly construct proper ParserType trees. But this does require two lookups - // of the corresponding definition. fn visit_struct_definition( &mut self, module: &Module, iter: &mut TokenIter, ctx: &mut PassCtx ) -> Result<(), ParseError> { @@ -103,13 +103,18 @@ impl PassDefinitions { consume_comma_separated( TokenKind::OpenCurly, TokenKind::CloseCurly, source, iter, |source, iter| { + let start_pos = iter.last_valid_pos(); let parser_type = consume_parser_type( - source, iter, &ctx.symbols, &ctx.heap, poly_vars, module_scope, definition_id, false + source, iter, &ctx.symbols, &ctx.heap, poly_vars, module_scope, + definition_id, false, 0 )?; let field = consume_ident_interned(source, iter, ctx)?; - Ok(StructFieldDefinition{ field, parser_type }) + Ok(StructFieldDefinition{ + span: InputSpan::from_positions(start_pos, field.span.end), + field, parser_type + }) }, - &mut self.struct_fields, "a struct field", "a list of struct fields" + &mut self.struct_fields, "a struct field", "a list of struct fields", None )?; // Transfer to preallocated definition @@ -148,7 +153,7 @@ impl PassDefinitions { }; Ok(EnumVariantDefinition{ identifier, value }) }, - &mut self.enum_variants, "an enum variant", "a list of enum variants" + &mut self.enum_variants, "an enum variant", "a list of enum variants", None )?; // Transfer to definition @@ -178,13 +183,13 @@ impl PassDefinitions { TokenKind::OpenCurly, TokenKind::CloseCurly, source, iter, |source, iter| { let identifier = consume_ident_interned(source, iter, ctx)?; - let close_pos = identifier.span.end; + let mut close_pos = identifier.span.end; let has_embedded = maybe_consume_comma_separated( TokenKind::OpenParen, TokenKind::CloseParen, source, iter, |source, iter| { consume_parser_type( source, iter, &ctx.symbols, &ctx.heap, poly_vars, - module_scope, definition_id, false + module_scope, definition_id, false, 0 ) }, &mut self.parser_types, "an embedded type", Some(&mut close_pos) @@ -233,12 +238,13 @@ impl PassDefinitions { self.parameters.clear(); // Consume return types - consume_comma_separated( - TokenKind::ArrowRight, TokenKind::OpenCurly, &module.source, iter, + consume_token(&module.source, iter, TokenKind::ArrowRight)?; + consume_comma_separated_until( + TokenKind::OpenCurly, &module.source, iter, |source, iter| { consume_parser_type(source, iter, &ctx.symbols, &ctx.heap, poly_vars, module_scope, definition_id, false) }, - &mut self.parser_types, "a return type", "the return types", None + &mut self.parser_types, "a return type", None )?; let return_types = self.parser_types.clone(); self.parser_types.clear(); @@ -246,41 +252,46 @@ impl PassDefinitions { // Consume block } + /// Consumes a statement and returns a boolean indicating whether it was a + /// block or not. fn consume_statement( &mut self, module: &Module, iter: &mut TokenIter, ctx: &mut PassCtx - ) -> Result { + ) -> Result<(StatementId, bool), ParseError> { let next = iter.next().expect("consume_statement has a next token"); - if next == TokenKind::OpenCurly { - return self.consume_block_statement(module, iter, ctx)?.upcast(); + let mut was_block = false; + let statement = if next == TokenKind::OpenCurly { + was_block = true; + self.consume_block_statement(module, iter, ctx)?.upcast() } else if next == TokenKind::Ident { let (ident, _) = consume_any_ident(source, iter)?; if ident == KW_STMT_IF { - return self.consume_if_statement(module, iter, ctx)?; + self.consume_if_statement(module, iter, ctx)? } else if ident == KW_STMT_WHILE { - return self.consume_while_statement(module, iter, ctx)?; + self.consume_while_statement(module, iter, ctx)? } else if ident == KW_STMT_BREAK { - return self.consume_break_statement(module, iter, ctx)?; + self.consume_break_statement(module, iter, ctx)? } else if ident == KW_STMT_CONTINUE { - return self.consume_continue_statement(module, iter, ctx)?; + self.consume_continue_statement(module, iter, ctx)? } else if ident == KW_STMT_SYNC { - return self.consume_synchronous_statement(module, iter, ctx)?; + self.consume_synchronous_statement(module, iter, ctx)? } else if ident == KW_STMT_RETURN { - return self.consume_return_statement(module, iter, ctx)?; - } else if ident == KW_STMT_ASSERT { - // TODO: Unify all builtin function calls as expressions - return self.consume_assert_statement(module, iter, ctx)?; + self.consume_return_statement(module, iter, ctx)? } else if ident == KW_STMT_GOTO { - return self.consume_goto_statement(module, iter, ctx)?; + self.consume_goto_statement(module, iter, ctx)? } else if ident == KW_STMT_NEW { - return self.consume_new_statement(module, iter, ctx)?; + self.consume_new_statement(module, iter, ctx)? + } else if ident == KW_STMT_CHANNEL { + self.consume_channel_statement(module, iter, ctx)? } else if iter.peek() == Some(TokenKind::Colon) { - return self.consume_labeled_statement(module, iter, ctx)?; + self.consume_labeled_statement(module, iter, ctx)? + } else { + // Attempt to parse as expression + self.consume_expression_statement(module, iter, ctx)? } - } + }; - // If here then attempt to parse as expression - return self.consume_expr_statement(module, iter, ctx)?; + return Ok((statement, was_block)); } fn consume_block_statement( @@ -304,6 +315,7 @@ impl PassDefinitions { Ok(ctx.heap.alloc_block_statement(|this| BlockStatement{ this, + is_implicit: false, span: block_span, statements, parent_scope: None, @@ -316,8 +328,258 @@ impl PassDefinitions { fn consume_if_statement( &mut self, module: &Module, iter: &mut TokenIter, ctx: &mut PassCtx ) -> Result { - consume_exact_ident(&module.source, iter, KW_STMT_IF)?; - let test = consume_parenthesized_expression() + let if_span = consume_exact_ident(&module.source, iter, KW_STMT_IF)?; + consume_token(&module.source, iter, TokenKind::OpenParen)?; + let test = self.consume_expression(module, iter, ctx)?; + consume_token(&module.source, iter, TokenKind::CloseParen)?; + let (true_body, was_block) = self.consume_statement(module, iter, ctx)?; + let true_body = Self::wrap_in_block(ctx, true_body, was_block); + + let false_body = if has_ident(source, iter, KW_STMT_ELSE) { + iter.consume(); + let (false_body, was_block) = self.consume_statement(module, iter, ctx)?; + Some(Self::wrap_in_block(ctx, false_body, was_block)) + } else { + None + }; + + Ok(ctx.heap.alloc_if_statement(|this| IfStatement{ + this, + span: if_span, + test, + true_body, + false_body, + end_if: None, + })) + } + + fn consume_while_statement( + &mut self, module: &Module, iter: &mut TokenIter, ctx: &mut PassCtx + ) -> Result { + let while_span = consume_exact_ident(&module.source, iter, KW_STMT_WHILE)?; + consume_token(&module.source, iter, TokenKind::OpenParen)?; + let test = self.consume_expression(module, iter, ctx)?; + consume_token(&module.source, iter, TokenKind::CloseParen)?; + let (body, was_block) = self.consume_statement(module, iter, ctx)?; + let body = Self::wrap_in_block(ctx, body, was_block); + + Ok(ctx.heap.alloc_while_statement(|this| WhileStatement{ + this, + span: while_span, + test, + body, + end_while: None, + in_sync: None, + })) + } + + fn consume_break_statement( + &mut self, module: &Module, iter: &mut TokenIter, ctx: &mut PassCtx + ) -> Result { + let break_span = consume_exact_ident(&module.source, iter, KW_STMT_BREAK)?; + let label = if Some(TokenKind::Ident) == iter.next() { + let label = consume_ident_interned(&module.source, iter, ctx)?; + Some(label) + } else { + None + }; + consume_token(&module.source, iter, TokenKind::SemiColon)?; + Ok(ctx.heap.alloc_break_statement(|this| BreakStatement{ + this, + span: break_span, + label, + target: None, + })) + } + + fn consume_continue_statement( + &mut self, module: &Module, iter: &mut TokenIter, ctx: &mut PassCtx + ) -> Result { + let continue_span = consume_exact_ident(&module.source, iter, KW_STMT_CONTINUE)?; + let label= if Some(TokenKind::Ident) == iter.next() { + let label = consume_ident_interned(&module.source, iter, ctx)?; + Some(label) + } else { + None + }; + consume_token(&module.source, iter, TokenKind::SemiColon)?; + Ok(ctx.heap.alloc_continue_statement(|this| ContinueStatement{ + this, + span: continue_span, + label, + target: None + })) + } + + fn consume_synchronous_statement( + &mut self, module: &Module, iter: &mut TokenIter, ctx: &mut PassCtx + ) -> Result { + let synchronous_span = consume_exact_ident(&module.source, iter, KW_STMT_SYNC)?; + let (body, was_block) = self.consume_statement(module, iter, ctx)?; + let body = Self::wrap_in_block(ctx, body, was_block); + Ok(ctx.heap.alloc_synchronous_statement(|this| SynchronousStatement{ + this, + span: synchronous_span, + body, + end_sync: None, + parent_scope: None, + })) + } + + fn consume_return_statement( + &mut self, module: &Module, iter: &mut TokenIter, ctx: &mut PassCtx + ) -> Result { + let return_span = consume_exact_ident(&module.source, iter, KW_STMT_RETURN)?; + let mut scoped_section = self.expressions.start_section(); + + consume_comma_separated_until( + TokenKind::SemiColon, &module.source, iter, + |source, iter| self.consume_expression(module, iter, ctx), + &mut scoped_section, "a return expression", None + )?; + let expressions = scoped_section.into_vec(); + + if expressions.is_empty() { + return Err(ParseError::new_error_str_at_span(&module.source, return_span, "expected at least one return value")); + } + + Ok(ctx.heap.alloc_return_statement(|this| ReturnStatement{ + this, + span: return_span, + expressions + })) + } + + fn consume_goto_statement( + &mut self, module: &Module, iter: &mut TokenIter, ctx: &mut PassCtx + ) -> Result { + let goto_span = consume_exact_ident(&module.source, iter, KW_STMT_GOTO)?; + let label = consume_ident_interned(&module.source, iter, ctx)?; + consume_token(&module.source, iter, TokenKind::SemiColon)?; + Ok(ctx.heap.alloc_goto_statement(|this| GotoStatement{ + this, + span: goto_span, + label, + target: None + })) + } + + fn consume_new_statement( + &mut self, module: &Module, iter: &mut TokenIter, ctx: &mut PassCtx + ) -> Result { + let new_span = consume_exact_ident(&module.source, iter, KW_STMT_NEW)?; + + // TODO: @Cleanup, should just call something like consume_component_expression-ish + let start_pos = iter.last_valid_pos(); + let expression_id = self.consume_primary_expression(module, iter, ctx)?; + let expression = &ctx.heap[expression_id]; + let mut valid = false; + + let mut call_id = CallExpressionId.new_invalid(); + if let Expression::Call(expression) = expression { + if expression.method == Method::UserComponent { + call_id = expression.this; + valid = true; + } + } + + if !valid { + return Err(ParseError::new_error_str_at_span( + source, InputSpan::from_positions(start_pos, iter.last_valid_pos()), + "expected a call to a component" + )); + } + consume_token(&module.source, iter, TokenKind::SemiColon)?; + + debug_assert!(!call_id.is_invalid()); + Ok(ctx.heap.alloc_new_statement(|this| NewStatement{ + this, + span: new_span, + expression: call_id, + next: None + })) + } + + fn consume_channel_statement( + &mut self, module: &Module, iter: &mut TokenIter, ctx: &mut PassCtx + ) -> Result { + // Consume channel specification + let channel_span = consume_exact_ident(&module.source, iter, KW_STMT_CHANNEL)?; + let channel_type = if Some(TokenKind::OpenAngle) = iter.next() { + // Retrieve the type of the channel, we're cheating a bit here by + // consuming the first '<' and setting the initial angle depth to 1 + // such that our final '>' will be consumed as well. + iter.consume(); + consume_parser_type( + &module.source, iter, &ctx.symbols, &ctx.heap, + poly_vars, SymbolScope::Module(module.root_id), definition_id, + true, 1 + )? + } else { + // Assume inferred + ParserType{ elements: vec![ParserTypeElement{ + full_span: channel_span, // TODO: @Span fix + variant: ParserTypeVariant::Inferred + }]} + }; + + let from_identifier = consume_ident_interned(&module.source, iter, ctx)?; + consume_token(&module.source, iter, TokenKind::ArrowRight)?; + let to_identifier = consume_ident_interned(&module.source, iter, ctx)?; + consume_token(&module.source, iter, TokenKind::SemiColon)?; + + // Construct ports + let from = ctx.heap.alloc_local(|this| Local{ + this, + identifier: from_identifier, + parser_type: channel_type.clone(), + relative_pos_in_block: 0, + }); + let to = ctx.heap.alloc_local(|this| Local{ + this, + identifier: to_identifier, + parser_type: channel_type, + relative_pos_in_block: 0, + }); + + // Construct the channel + Ok(ctx.heap.alloc_channel_statement(|this| ChannelStatement{ + this, + span: channel_span, + from, to, + relative_pos_in_block: 0, + next: None, + })) + } + + fn consume_labeled_statement( + &mut self, module: &Module, iter: &mut TokenIter, ctx: &mut PassCtx + ) -> Result { + let label = consume_ident_interned(&module.source, iter, ctx)?; + consume_token(&module.source, iter, TokenKind::Colon)?; + let (body, _) = self.consume_statement(module, iter, ctx)?; + + Ok(ctx.heap.alloc_labeled_statement(|this| LabeledStatement{ + this, label, body, + relative_pos_in_block: 0, + in_sync: None + })) + } + + fn consume_expression_statement( + &mut self, module: &Module, iter: &mut TokenIter, ctx: &mut PassCtx + ) -> Result { + let start_pos = iter.last_valid_pos(); + let expression = self.consume_expression(module, iter, ctx)?; + let end_pos = iter.last_valid_pos(); + consume_token(&module.source, iter, TokenKind::SemiColon)?; + + Ok(ctx.heap.alloc_expression_statement(|this| ExpressionStatement{ + this, + span: InputSpan::from_positions(start_pos, end_pos), + expression, + next: None, + })) } //-------------------------------------------------------------------------- @@ -342,7 +604,7 @@ impl PassDefinitions { return None } - let matched = match token.unwrap() { + match token.unwrap() { TK::Equal => Some(AO::Set), TK::StarEquals => Some(AO::Multiplied), TK::SlashEquals => Some(AO::Divided), @@ -355,7 +617,7 @@ impl PassDefinitions { TK::CaretEquals => Some(AO::BitwiseXored), TK::OrEquals => Some(AO::BitwiseOred), _ => None - }; + } } let expr = self.consume_conditional_expression(module, iter, ctx)?; @@ -680,60 +942,245 @@ impl PassDefinitions { ) -> Result { let next = iter.next(); - let result; - if next == Some(TokenKind::OpenParen) { + let result = if next == Some(TokenKind::OpenParen) { // Expression between parentheses iter.consume(); - result = self.consume_expression(module, iter, ctx)?; + let result = self.consume_expression(module, iter, ctx)?; consume_token(&module.source, iter, TokenKind::CloseParen)?; + + result } else if next == Some(TokenKind::OpenCurly) { // Array literal let (start_pos, mut end_pos) = iter.next_positions(); - let mut expressions = Vec::new(); + let mut scoped_section = self.expressions.start_section(); consume_comma_separated( TokenKind::OpenCurly, TokenKind::CloseCurly, &module.source, iter, |source, iter| self.consume_expression(module, iter, ctx), - &mut expressions, "an expression", "a list of expressions", Some(&mut end_pos) + &mut scoped_section, "an expression", "a list of expressions", Some(&mut end_pos) )?; - // TODO: Turn into literal - result = ctx.heap.alloc_array_expression(|this| ArrayExpression{ + ctx.heap.alloc_literal_expression(|this| LiteralExpression{ this, span: InputSpan::from_positions(start_pos, end_pos), - elements: expressions, + value: Literal::Array(scoped_section.into_vec()), parent: ExpressionParent::None, concrete_type: ConcreteType::default(), - }).upcast(); + }).upcast() } else if next == Some(TokenKind::Integer) { let (literal, span) = consume_integer_literal(&module.source, iter, &mut self.buffer)?; - result = ctx.heap.alloc_literal_expression(|this| LiteralExpression{ + + ctx.heap.alloc_literal_expression(|this| LiteralExpression{ this, span, value: Literal::Integer(LiteralInteger{ unsigned_value: literal, negated: false }), parent: ExpressionParent::None, concrete_type: ConcreteType::default(), - }).upcast(); + }).upcast() } else if next == Some(TokenKind::String) { let span = consume_string_literal(&module.source, iter, &mut self.buffer)?; let interned = ctx.pool.intern(self.buffer.as_bytes()); - result = ctx.heap.alloc_literal_expression(|this| LiteralExpression{ + + ctx.heap.alloc_literal_expression(|this| LiteralExpression{ this, span, value: Literal::String(interned), parent: ExpressionParent::None, concrete_type: ConcreteType::default(), - }).upcast(); + }).upcast() } else if next == Some(TokenKind::Character) { let (character, span) = consume_character_literal(&module.source, iter)?; - result = ctx.heap.alloc_literal_expression(|this| LiteralExpression{ + + ctx.heap.alloc_literal_expression(|this| LiteralExpression{ this, span, value: Literal::Character(character), parent: ExpressionParent::None, concrete_type: ConcreteType::default(), - }).upcast(); + }).upcast() } else if next == Some(TokenKind::Ident) { // May be a variable, a type instantiation or a function call. If we // have a single identifier that we cannot find in the type table - // then we're going to assume that we're dealign with a variable. - } + // then we're going to assume that we're dealing with a variable. + let ident_span = iter.next_span(); + let ident_text = module.source.section_at_span(ident_span); + let symbol = ctx.symbols.get_symbol_by_name(SymbolScope::Module(module.root_id), ident_text); + + if symbol.is_some() { + // The first bit looked like a symbol, so we're going to follow + // that all the way through, assume we arrive at some kind of + // function call or type instantiation + use ParserTypeVariant as PTV; + + let symbol_scope = SymbolScope::Definition(self.cur_definition); + let poly_vars = ctx.heap[self.cur_definition].poly_vars(); + let parser_type = consume_parser_type( + &module.source, iter, &ctx.symbols, &ctx.heap, poly_vars, symbol_scope, + self.cur_definition, true, 0 + )?; + debug_assert!(!parser_type.elements.is_empty()); + match parser_type.elements[0].variant { + PTV::Definition(target_definition_id, _) => { + let definition = &ctx.heap[target_definition_id]; + match definition { + Definition::Struct(_) => { + // Struct literal + let mut last_token = iter.last_valid_pos(); + let mut struct_fields = Vec::new(); + consume_comma_separated( + TokenKind::OpenCurly, TokenKind::CloseCurly, &module.source, iter, + |source, iter| { + let identifier = consume_ident_interned(source, iter, ctx)?; + consume_token(source, iter, TokenKind::Colon)?; + let value = self.consume_expression(module, iter, ctx)?; + Ok(LiteralStructField{ identifier, value, field_idx: 0 }) + }, + &mut struct_fields, "a struct field", "a list of struct field", Some(&mut last_token) + )?; + + ctx.heap.alloc_literal_expression(|this| LiteralExpression{ + this, + span: InputSpan::from_positions(ident_span.begin, last_token), + value: Literal::Struct(LiteralStruct{ + parser_type, + fields: struct_fields, + definition: target_definition_id, + }), + parent: ExpressionParent::None, + concrete_type: ConcreteType::default(), + }).upcast() + }, + Definition::Enum(_) => { + // Enum literal: consume the variant + consume_token(&module.source, iter, TokenKind::ColonColon)?; + let variant = consume_ident_interned(&module.source, iter, ctx)?; + + ctx.heap.alloc_literal_expression(|this| LiteralExpression{ + this, + span: InputSpan::from_positions(ident_span.begin, variant.span.end), + value: Literal::Enum(LiteralEnum{ + parser_type, + variant, + definition: target_definition_id, + variant_idx: 0 + }), + parent: ExpressionParent::None, + concrete_type: ConcreteType::default() + }).upcast() + }, + Definition::Union(_) => { + // Union literal: consume the variant + consume_token(&module.source, iter, TokenKind::ColonColon)?; + let variant = consume_ident_interned(&module.source, iter, ctx)?; + + // Consume any possible embedded values + let mut end_pos = iter.last_valid_pos(); + let values = self.consume_expression_list(module, iter, ctx, Some(&mut end_pos))?; + + ctx.heap.alloc_literal_expression(|this| LiteralExpression{ + this, + span: InputSpan::from_positions(ident_span.begin, end_pos), + value: Literal::Union(LiteralUnion{ + parser_type, variant, values, + definition: target_definition_id, + variant_idx: 0, + }), + parent: ExpressionParent::None, + concrete_type: ConcreteType::default() + }).upcast() + }, + Definition::Component(_) => { + // Component instantiation + let arguments = self.consume_expression_list(module, iter, ctx, None)?; + + ctx.heap.alloc_call_expression(|this| CallExpression{ + this, + span: parser_type.elements[0].full_span, // TODO: @Span fix + parser_type, + method: Method::UserComponent, + arguments, + definition: target_definition_id, + parent: ExpressionParent::None, + concrete_type: ConcreteType::default(), + }).upcast() + }, + Definition::Function(function_definition) => { + // Function call: consume the arguments + let arguments = self.consume_expression_list(module, iter, ctx, None)?; + + // Check whether it is a builtin function + let method = if function_definition.builtin { + match function_definition.identifier.value.as_str() { + "get" => Method::Get, + "put" => Method::Put, + "fires" => Method::Fires, + "create" => Method::Create, + "length" => Method::Length, + "assert" => Method::Assert, + _ => unreachable!(), + } + } else { + Method::UserFunction + }; + + ctx.heap.alloc_call_expression(|this| CallExpression{ + this, + span: parser_type.elements[0].full_span, // TODO: @Span fix + parser_type, + method, + arguments, + definition: target_definition_id, + parent: ExpressionParent::None, + concrete_type: ConcreteType::default(), + }).upcast() + } + } + }, + _ => { + // TODO: Casting expressions + return Err(ParseError::new_error_str_at_span( + &module.source, parser_type.elements[0].full_span, + "unexpected type in expression, note that casting expressions are not yet implemented" + )) + } + } + } else { + // Check for builtin keywords or builtin functions + if ident_text == KW_LIT_NULL || ident_text == KW_LIT_TRUE || ident_text == KW_LIT_FALSE { + // Parse builtin literal + let value = match ident_text { + KW_LIT_NULL => Literal::Null, + KW_LIT_TRUE => Literal::True, + KW_LIT_FALSE => Literal::False, + _ => unreachable!(), + }; + + ctx.heap.alloc_literal_expression(|this| LiteralExpression{ + this, + span: ident_span, + value, + parent: ExpressionParent::None, + concrete_type: ConcreteType::default(), + }).upcast() + } else { + // I'm a bit unsure about this. One may as well have wrongfully + // typed `TypeWithTypo::`, then we assume that + // `TypeWithTypo` is a variable. So might want to come back to + // this later to do some silly heuristics. + iter.consume(); + if Some(TokenKind::ColonColon) == iter.next() { + return Err(ParseError::new_error_str_at_span(&module.source, ident_span, "unknown identifier")); + } + + let ident_text = ctx.pool.intern(ident_text); + let identifier = Identifier { span: ident_span, value: ident_text }; + + ctx.heap.alloc_variable_expression(|this| VariableExpression { + this, + identifier, + declaration: NJone, + parent: ExpressionParent::None, + concrete_type: ConcreteType::default() + }).upcast() + } + } + }; Ok(result) } @@ -766,6 +1213,37 @@ impl PassDefinitions { Ok(result) } + + #[inline] + fn consume_expression_list( + &mut self, module: &Module, iter: &mut TokenIter, ctx: &mut PassCtx, end_pos: Option<&mut InputPosition> + ) -> Result, ParseError> { + let mut section = self.expressions.start_section(); + consume_comma_separated( + TokenKind::OpenParen, TokenKind::CloseParen, &module.source, iter, + |source, iter| self.consume_expression(module, iter, ctx), + &mut section, "an expression", "a list of expressions", end_pos + )?; + Ok(section.into_vec()) + } + + fn wrap_in_block(ctx: &mut PassCtx, statement: StatementId, was_block: bool) -> BlockStatementId { + debug_assert_eq!(was_block, ctx.heap[statement].is_block()); + if was_block { + return BlockStatementId(StatementId::new(statement.index)); // Yucky + } + + ctx.heap.alloc_block_statement(|this| BlockStatement{ + this, + is_implicit: true, + span: ctx.heap[statement].span(), + statements: vec![statement], + parent_scope: None, + relative_pos_in_parent: 0, + locals: Vec::new(), + labels: Vec::new(), + }) + } } /// Consumes a type. A type always starts with an identifier which may indicate @@ -773,10 +1251,12 @@ impl PassDefinitions { /// polymorphic arguments makes it a tree-like structure. Because we cannot rely /// on knowing the exact number of polymorphic arguments we do not check for /// these. -// TODO: @Optimize, and fix spans if needed +/// +/// Note that the first depth index is used as a hack. +// TODO: @Optimize, @Span fix fn consume_parser_type( source: &InputSource, iter: &mut TokenIter, symbols: &SymbolTable, heap: &Heap, poly_vars: &[Identifier], - cur_scope: SymbolScope, wrapping_definition: DefinitionId, allow_inference: bool + cur_scope: SymbolScope, wrapping_definition: DefinitionId, allow_inference: bool, first_angle_depth: i32, ) -> Result { struct Entry{ element: ParserTypeElement, @@ -822,7 +1302,7 @@ fn consume_parser_type( iter.consume(); enum State { Ident, Open, Close, Comma }; let mut state = State::Open; - let mut angle_depth = 1; + let mut angle_depth = first_angle_depth + 1; loop { let next = iter.next(); @@ -1005,6 +1485,9 @@ fn consume_parser_type( Ok(ParserType{ elements: constructed_elements }) } +/// Consumes an identifier for which we assume that it resolves to some kind of +/// type. Once we actually arrive at a type we will stop parsing. Hence there +/// may be trailing '::' tokens in the iterator. fn consume_parser_type_ident( source: &InputSource, iter: &mut TokenIter, symbols: &SymbolTable, heap: &Heap, poly_vars: &[Identifier], mut scope: SymbolScope, wrapping_definition: DefinitionId, allow_inference: bool, @@ -1123,7 +1606,8 @@ fn consume_parameter_list( |source, iter| { let (start_pos, _) = iter.next_positions(); let parser_type = consume_parser_type( - source, iter, &ctx.symbols, &ctx.heap, poly_vars, scope, definition_id, false + source, iter, &ctx.symbols, &ctx.heap, poly_vars, scope, + definition_id, false, 0 )?; let identifier = consume_ident_interned(source, iter, ctx)?; let parameter_id = ctx.heap.alloc_parameter(|this| Parameter{ diff --git a/src/protocol/parser/symbol_table2.rs b/src/protocol/parser/symbol_table2.rs index 967c44cde8d40bb846caa0977afd513f46cfa239..9242f9aaccb1348fb7cc46722ed977d0e023e690 100644 --- a/src/protocol/parser/symbol_table2.rs +++ b/src/protocol/parser/symbol_table2.rs @@ -18,6 +18,7 @@ const RESERVED_SYMBOLS: usize = 32; #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum SymbolScope { + Global, Module(RootId), Definition(DefinitionId), } @@ -97,7 +98,8 @@ pub struct SymbolModule { #[derive(Debug, Clone)] pub struct SymbolDefinition { // Definition location (not necessarily the place where the symbol - // is introduced, as it may be imported) + // is introduced, as it may be imported). Builtin symbols will have invalid + // spans and module IDs pub defined_in_module: RootId, pub defined_in_scope: SymbolScope, pub definition_span: InputSpan, // full span of definition @@ -277,7 +279,10 @@ impl SymbolTable { None // in-scope modules are always imported }, SymbolVariant::Definition(variant) => { - if variant.imported_at.is_some() { + if variant.imported_at.is_some() || variant.defined_in_scope == SymbolScope::Global { + // Symbol is imported or lives in the global scope. + // Things in the global scope are defined by the + // compiler. None } else { Some(symbol) diff --git a/src/protocol/parser/token_parsing.rs b/src/protocol/parser/token_parsing.rs index 4d975a80bdca9d1b8a75f9755a298ff4c5d8c89d..cf4585680eae8a75aa0611abe7da4902207c7c3f 100644 --- a/src/protocol/parser/token_parsing.rs +++ b/src/protocol/parser/token_parsing.rs @@ -1,4 +1,4 @@ -use crate::collections::StringRef; +use crate::collections::{StringRef, ScopedSection}; use crate::protocol::ast::*; use crate::protocol::input_source2::{ InputSource2 as InputSource, @@ -32,17 +32,18 @@ pub(crate) const KW_FUNC_PUT: &'static [u8] = b"put"; pub(crate) const KW_FUNC_FIRES: &'static [u8] = b"fires"; pub(crate) const KW_FUNC_CREATE: &'static [u8] = b"create"; pub(crate) const KW_FUNC_LENGTH: &'static [u8] = b"length"; +pub(crate) const KW_FUNC_ASSERT: &'static [u8] = b"assert"; // Keywords - statements pub(crate) const KW_STMT_CHANNEL: &'static [u8] = b"channel"; pub(crate) const KW_STMT_IF: &'static [u8] = b"if"; +pub(crate) const KW_STMT_ELSE: &'static [u8] = b"else"; pub(crate) const KW_STMT_WHILE: &'static [u8] = b"while"; pub(crate) const KW_STMT_BREAK: &'static [u8] = b"break"; pub(crate) const KW_STMT_CONTINUE: &'static [u8] = b"continue"; pub(crate) const KW_STMT_GOTO: &'static [u8] = b"goto"; pub(crate) const KW_STMT_RETURN: &'static [u8] = b"return"; pub(crate) const KW_STMT_SYNC: &'static [u8] = b"synchronous"; -pub(crate) const KW_STMT_ASSERT: &'static [u8] = b"assert"; pub(crate) const KW_STMT_NEW: &'static [u8] = b"new"; // Keywords - types @@ -62,6 +63,35 @@ pub(crate) const KW_TYPE_CHAR: &'static [u8] = b"char"; pub(crate) const KW_TYPE_STRING: &'static [u8] = b"string"; pub(crate) const KW_TYPE_INFERRED: &'static [u8] = b"auto"; +/// A special trait for when consuming comma-separated things such that we can +/// push them onto a `Vec` and onto a `ScopedSection`. As we monomorph for +/// very specific comma-separated cases I don't expect polymorph bloat. +/// Also, I really don't like this solution. +pub(crate) trait Extendable { + type Value; + + #[inline] + fn push(&mut self, v: Self::Value); +} + +impl Extendable for Vec { + type Value = T; + + #[inline] + fn push(&mut self, v: Self::Value) { + (self as Vec).push(v); + } +} + +impl Extendable for ScopedSection { + type Value = T; + + #[inline] + fn push(&mut self, v: Self::Value) { + (self as ScopedSection).push(v); + } +} + /// Consumes a domain-name identifier: identifiers separated by a dot. For /// simplification of later parsing and span identification the domain-name may /// contain whitespace, but must reside on the same line. @@ -105,28 +135,15 @@ pub(crate) fn consume_token(source: &InputSource, iter: &mut TokenIter, expected Ok(span) } -/// Consumes a comma-separated list of items if the opening delimiting token is -/// encountered. If not, then the iterator will remain at its current position. -/// Note that the potential cases may be: -/// - No opening delimiter encountered, then we return `false`. -/// - Both opening and closing delimiter encountered, but no items. -/// - Opening and closing delimiter encountered, and items were processed. -/// - Found an opening delimiter, but processing an item failed. -pub(crate) fn maybe_consume_comma_separated( - open_delim: TokenKind, close_delim: TokenKind, source: &InputSource, iter: &mut TokenIter, - consumer_fn: F, target: &mut Vec, item_name_and_article: &'static str, +/// Consumes a comma separated list until the closing delimiter is encountered +pub(crate) fn consume_comma_separated_until( + close_delim: TokenKind, source: &InputSource, iter: &mut TokenIter, + consumer_fn: F, target: &mut E, item_name_and_article: &'static str, close_pos: Option<&mut InputPosition> -) -> Result - where F: Fn(&InputSource, &mut TokenIter) -> Result +) -> Result<(), ParseError> + where F: Fn(&InputSource, &mut TokenIter) -> Result, + E: Extendable { - let mut next = iter.next(); - if Some(open_delim) != next { - return Ok(false); - } - - // Opening delimiter encountered, so must parse the comma-separated list. - iter.consume(); - target.clear(); let mut had_comma = true; loop { next = iter.next(); @@ -155,6 +172,33 @@ pub(crate) fn maybe_consume_comma_separated( } } + Ok(()) +} + +/// Consumes a comma-separated list of items if the opening delimiting token is +/// encountered. If not, then the iterator will remain at its current position. +/// Note that the potential cases may be: +/// - No opening delimiter encountered, then we return `false`. +/// - Both opening and closing delimiter encountered, but no items. +/// - Opening and closing delimiter encountered, and items were processed. +/// - Found an opening delimiter, but processing an item failed. +pub(crate) fn maybe_consume_comma_separated( + open_delim: TokenKind, close_delim: TokenKind, source: &InputSource, iter: &mut TokenIter, + consumer_fn: F, target: &mut E, item_name_and_article: &'static str, + close_pos: Option<&mut InputPosition> +) -> Result + where F: Fn(&InputSource, &mut TokenIter) -> Result, + E: Extendable +{ + let mut next = iter.next(); + if Some(open_delim) != next { + return Ok(false); + } + + // Opening delimiter encountered, so must parse the comma-separated list. + iter.consume(); + consume_comma_separated_until(close_delim, source, iter, consumer_fn, target, item_name_and_article, close_pos)?; + Ok(true) } @@ -194,12 +238,13 @@ pub(crate) fn maybe_consume_comma_separated_spilled( +pub(crate) fn consume_comma_separated( open_delim: TokenKind, close_delim: TokenKind, source: &InputSource, iter: &mut TokenIter, consumer_fn: F, target: &mut Vec, item_name_and_article: &'static str, list_name_and_article: &'static str, close_pos: Option<&mut InputPosition> ) -> Result<(), ParseError> - where F: Fn(&InputSource, &mut TokenIter) -> Result + where F: Fn(&InputSource, &mut TokenIter) -> Result, + E: Extendable { let first_pos = iter.last_valid_pos(); match maybe_consume_comma_separated( @@ -345,6 +390,8 @@ pub(crate) fn consume_string_literal( } } + debug_assert!(!was_escape); // because otherwise we couldn't have ended the string literal + Ok(span) } @@ -446,7 +493,7 @@ fn is_reserved_statement_keyword(text: &[u8]) -> bool { KW_IMPORT | KW_AS | KW_STMT_CHANNEL | KW_STMT_IF | KW_STMT_WHILE | KW_STMT_BREAK | KW_STMT_CONTINUE | KW_STMT_GOTO | KW_STMT_RETURN | - KW_STMT_SYNC | KW_STMT_ASSERT | KW_STMT_NEW => true, + KW_STMT_SYNC | KW_STMT_NEW => true, _ => false, } } @@ -455,7 +502,7 @@ fn is_reserved_expression_keyword(text: &[u8]) -> bool { match text { KW_LET | KW_LIT_TRUE | KW_LIT_FALSE | KW_LIT_NULL | - KW_FUNC_GET | KW_FUNC_PUT | KW_FUNC_FIRES | KW_FUNC_CREATE | KW_FUNC_LENGTH => true, + KW_FUNC_GET | KW_FUNC_PUT | KW_FUNC_FIRES | KW_FUNC_CREATE | KW_FUNC_ASSERT | KW_FUNC_LENGTH => true, _ => false, } } @@ -498,39 +545,54 @@ pub(crate) fn construct_symbol_conflict_error( modules: &[Module], module_idx: usize, ctx: &PassCtx, new_symbol: &Symbol, old_symbol: &Symbol ) -> ParseError { let module = &modules[module_idx]; - let get_symbol_span_and_msg = |symbol: &Symbol| -> (String, InputSpan) { - match symbol.introduced_at { - Some(import_id) => { - // Symbol is being imported - let import = &ctx.heap[import_id]; - match import { - Import::Module(import) => ( - format!("the module aliased as '{}' imported here", symbol.name.as_str()), - import.span - ), - Import::Symbols(symbols) => ( - format!("the type '{}' imported here", symbol.name.as_str()), - symbols.span - ), - } + let get_symbol_span_and_msg = |symbol: &Symbol| -> (String, Option) { + match &symbol.variant { + SymbolVariant::Module(module) => { + let import = &ctx.heap[module.introduced_at]; + return ( + format!("the module aliased as '{}' imported here", symbol.name.as_str()), + Some(import.as_module().span) + ); }, - None => { - // Symbol is being defined - debug_assert_eq!(symbol.defined_in_module, module.root_id); - debug_assert_ne!(symbol.definition.symbol_class(), SymbolClass::Module); - ( - format!("the type '{}' defined here", symbol.name.as_str()), - symbol.identifier_span - ) + SymbolVariant::Definition(definition) => { + if definition.defined_in_module.is_invalid() { + // Must be a builtin thing + return (format!("the builtin '{}'", symbol.name.as_str()), None) + } else { + if let Some(import_id) = definition.imported_at { + let import = &ctx.heap[import_id]; + return ( + format!("the type '{}' imported here", symbol.name.as_str()), + Some(import.as_symbols().span) + ); + } else { + // This is a defined symbol. So this must mean that the + // error was caused by it being defined. + debug_assert_eq!(definition.defined_in_module, module.root_id); + + return ( + format!("the type '{}' defined here", symbol.name.as_str()), + Some(definition.identifier_span) + ) + } + } } } }; let (new_symbol_msg, new_symbol_span) = get_symbol_span_and_msg(new_symbol); let (old_symbol_msg, old_symbol_span) = get_symbol_span_and_msg(old_symbol); - return ParseError::new_error_at_span( - &module.source, new_symbol_span, format!("symbol is defined twice: {}", new_symbol_msg) - ).with_info_at_span( - &module.source, old_symbol_span, format!("it conflicts with {}", old_symbol_msg) - ) + let new_symbol_span = new_symbol_span.unwrap(); // because new symbols cannot be builtin + + match old_symbol_span { + Some(old_symbol_span) => ParseError::new_error_at_span( + &module.source, new_symbol_span, format!("symbol is defined twice: {}", new_symbol_msg) + ).with_info_at_span( + &module.source, old_symbol_span, format!("it conflicts with {}", old_symbol_msg) + ), + None => ParseError::new_error_at_span( + &module.source, new_symbol_span, + format!("symbol is defined twice: {} conflicts with {}", new_symbol_msg, old_symbol_msg) + ) + } } \ No newline at end of file