diff --git a/src/protocol/parser/pass_definitions.rs b/src/protocol/parser/pass_definitions.rs index 8ce576bcfa720aef0e2e4d9701150fb216f14b7a..f1daca6c3081615e0dee6cdde1a16065d2eaf752 100644 --- a/src/protocol/parser/pass_definitions.rs +++ b/src/protocol/parser/pass_definitions.rs @@ -43,35 +43,33 @@ impl PassDefinitions { pub(crate) fn parse(&mut self, modules: &mut [Module], module_idx: usize, ctx: &mut PassCtx) -> Result<(), ParseError> { let module = &modules[module_idx]; - let module_range = &module.tokens.ranges[0]; debug_assert_eq!(module.phase, ModuleCompilationPhase::ImportsResolved); - debug_assert_eq!(module_range.range_kind, TokenRangeKind::Module); - // Although we only need to parse the definitions, we want to go through - // code ranges as well such that we can throw errors if we get - // unexpected tokens at the module level of the source. - let mut range_idx = module_range.first_child_idx; - loop { - let range_idx_usize = range_idx as usize; - let cur_range = &module.tokens.ranges[range_idx_usize]; - - match cur_range.range_kind { - TokenRangeKind::Module => unreachable!(), // should not be reachable - TokenRangeKind::Pragma | TokenRangeKind::Import => { - // Already fully parsed, fall through and go to next range - }, - TokenRangeKind::Definition | TokenRangeKind::Code => { - // Visit range even if it is a "code" range to provide - // proper error messages. - self.visit_range(modules, module_idx, ctx, range_idx_usize)?; - }, + // We iterate through the entire document. If we find a marker that has + // been handled then we skip over it. It is important that we properly + // parse all other tokens in the document to ensure that we throw the + // correct kind of errors. + let num_tokens = module.tokens.tokens.len() as u32; + let num_markers = module.tokens.markers.len(); + + let mut marker_index = 0; + let mut first_token_index = 0; + while first_token_index < num_tokens { + // Seek ahead to the next marker that was already handled. + let mut last_token_index = num_tokens; + let mut new_first_token_index = num_tokens; + while marker_index < num_markers { + let marker = &module.tokens.markers[marker_index]; + marker_index += 1; + if marker.handled { + last_token_index = marker.first_token; + new_first_token_index = marker.last_token; + break; + } } - if cur_range.next_sibling_idx == NO_SIBLING { - break; - } else { - range_idx = cur_range.next_sibling_idx; - } + self.visit_token_range(modules, module_idx, ctx, first_token_index, last_token_index)?; + first_token_index = new_first_token_index; } modules[module_idx].phase = ModuleCompilationPhase::DefinitionsParsed; @@ -79,15 +77,14 @@ impl PassDefinitions { Ok(()) } - fn visit_range( - &mut self, modules: &[Module], module_idx: usize, ctx: &mut PassCtx, range_idx: usize + fn visit_token_range( + &mut self, modules: &[Module], module_idx: usize, ctx: &mut PassCtx, + token_range_begin: u32, token_range_end: u32, ) -> Result<(), ParseError> { let module = &modules[module_idx]; - let cur_range = &module.tokens.ranges[range_idx]; - debug_assert!(cur_range.range_kind == TokenRangeKind::Definition || cur_range.range_kind == TokenRangeKind::Code); // Detect which definition we're parsing - let mut iter = module.tokens.iter_range(cur_range); + let mut iter = module.tokens.iter_range(token_range_begin, Some(token_range_end)); loop { let next = iter.next(); if next.is_none() { @@ -134,7 +131,7 @@ impl PassDefinitions { let start_pos = iter.last_valid_pos(); let parser_type = self.type_parser.consume_parser_type( iter, &ctx.heap, source, &ctx.symbols, poly_vars, definition_id, - module_scope, false, None + module_scope, false, false, None )?; let field = consume_ident_interned(source, iter, ctx)?; Ok(StructFieldDefinition{ @@ -221,7 +218,7 @@ impl PassDefinitions { let poly_vars = ctx.heap[definition_id].poly_vars(); self.type_parser.consume_parser_type( iter, &ctx.heap, source, &ctx.symbols, poly_vars, definition_id, - module_scope, false, None + module_scope, false, false, None ) }, &mut types_section, "an embedded type", Some(&mut close_pos) @@ -261,13 +258,15 @@ impl PassDefinitions { let definition_id = ctx.symbols.get_symbol_by_name_defined_in_scope(module_scope, ident_text) .unwrap().variant.as_definition().definition_id; self.cur_definition = definition_id; + let allow_compiler_types = module.is_compiler_file; consume_polymorphic_vars_spilled(&module.source, iter, ctx)?; // Parse function's argument list let mut parameter_section = self.variables.start_section(); consume_parameter_list( - &mut self.type_parser, &module.source, iter, ctx, &mut parameter_section, module_scope, definition_id + &mut self.type_parser, &module.source, iter, ctx, &mut parameter_section, + module_scope, definition_id, allow_compiler_types )?; let parameters = parameter_section.into_vec(); @@ -276,15 +275,16 @@ impl PassDefinitions { let poly_vars = ctx.heap[definition_id].poly_vars(); let parser_type = self.type_parser.consume_parser_type( iter, &ctx.heap, &module.source, &ctx.symbols, poly_vars, definition_id, - module_scope, false, None + module_scope, false, allow_compiler_types, None )?; - // Consume block and the definition's scope - let body_id = self.consume_block_statement(module, iter, ctx)?; + // Consume body + let (body_id, source) = self.consume_procedure_body(module, iter, ctx, definition_id, ProcedureKind::Function)?; let scope_id = ctx.heap.alloc_scope(|this| Scope::new(this, ScopeAssociation::Definition(definition_id))); // Assign everything in the preallocated AST node let function = ctx.heap[definition_id].as_procedure_mut(); + function.source = source; function.return_type = Some(parser_type); function.parameters = parameters; function.scope = scope_id; @@ -306,23 +306,27 @@ impl PassDefinitions { let definition_id = ctx.symbols.get_symbol_by_name_defined_in_scope(module_scope, ident_text) .unwrap().variant.as_definition().definition_id; self.cur_definition = definition_id; + let allow_compiler_types = module.is_compiler_file; consume_polymorphic_vars_spilled(&module.source, iter, ctx)?; // Parse component's argument list let mut parameter_section = self.variables.start_section(); consume_parameter_list( - &mut self.type_parser, &module.source, iter, ctx, &mut parameter_section, module_scope, definition_id + &mut self.type_parser, &module.source, iter, ctx, &mut parameter_section, + module_scope, definition_id, allow_compiler_types )?; let parameters = parameter_section.into_vec(); - // Consume block - let body_id = self.consume_block_statement(module, iter, ctx)?; + // Consume body + let procedure_kind = ctx.heap[definition_id].as_procedure().kind; + let (body_id, source) = self.consume_procedure_body(module, iter, ctx, definition_id, procedure_kind)?; let scope_id = ctx.heap.alloc_scope(|this| Scope::new(this, ScopeAssociation::Definition(definition_id))); // Assign everything in the AST node let component = ctx.heap[definition_id].as_procedure_mut(); debug_assert!(component.return_type.is_none()); + component.source = source; component.parameters = parameters; component.scope = scope_id; component.body = body_id; @@ -330,6 +334,70 @@ impl PassDefinitions { Ok(()) } + /// Consumes a procedure's body: either a user-defined procedure, which we + /// parse as normal, or a builtin function, where we'll make sure we expect + /// the particular builtin. + /// + /// We expect that the procedure's name is already stored in the + /// preallocated AST node. + fn consume_procedure_body( + &mut self, module: &Module, iter: &mut TokenIter, ctx: &mut PassCtx, definition_id: DefinitionId, kind: ProcedureKind + ) -> Result<(BlockStatementId, ProcedureSource), ParseError> { + if iter.next() == Some(TokenKind::OpenCurly) && iter.peek() == Some(TokenKind::Pragma) { + // Consume the placeholder "{ #builtin }" tokens + iter.consume(); // opening curly brace + let (pragma, pragma_span) = consume_pragma(&module.source, iter)?; + if pragma != b"#builtin" { + return Err(ParseError::new_error_str_at_span( + &module.source, pragma_span, + "expected a '#builtin' pragma, or a function body" + )); + } + + if iter.next() != Some(TokenKind::CloseCurly) { + // Just to keep the compiler writers in line ;) + panic!("compiler error: when using the #builtin pragma, wrap it in curly braces"); + } + iter.consume(); + + // Retrieve module and procedure name + assert!(module.name.is_some(), "compiler error: builtin procedure body in unnamed module"); + let (_, module_name) = module.name.as_ref().unwrap(); + let module_name = module_name.as_str(); + + let definition = ctx.heap[definition_id].as_procedure(); + let procedure_name = definition.identifier.value.as_str(); + + let source = match (module_name, procedure_name) { + ("std.global", "get") => ProcedureSource::FuncGet, + ("std.global", "put") => ProcedureSource::FuncPut, + ("std.global", "fires") => ProcedureSource::FuncFires, + ("std.global", "create") => ProcedureSource::FuncCreate, + ("std.global", "length") => ProcedureSource::FuncLength, + ("std.global", "assert") => ProcedureSource::FuncAssert, + ("std.global", "print") => ProcedureSource::FuncPrint, + ("std.random", "random_u32") => ProcedureSource::CompRandomU32, + ("std.internet", "tcp_client") => ProcedureSource::CompTcpClient, + _ => panic!( + "compiler error: unknown builtin procedure '{}' in module '{}'", + procedure_name, module_name + ), + }; + + return Ok((BlockStatementId::new_invalid(), source)); + } else { + let body_id = self.consume_block_statement(module, iter, ctx)?; + let source = match kind { + ProcedureKind::Function => + ProcedureSource::FuncUserDefined, + ProcedureKind::Primitive | ProcedureKind::Composite => + ProcedureSource::CompUserDefined, + }; + + return Ok((body_id, source)) + } + } + /// Consumes a statement and returns a boolean indicating whether it was a /// block or not. fn consume_statement(&mut self, module: &Module, iter: &mut TokenIter, ctx: &mut PassCtx) -> Result { @@ -759,10 +827,8 @@ impl PassDefinitions { if let Expression::Call(expression) = expression { // Allow both components and functions, as it makes more sense to // check their correct use in the validation and linking pass - if expression.method == Method::UserComponent || expression.method == Method::UserFunction { - call_id = expression.this; - valid = true; - } + call_id = expression.this; + valid = true; } if !valid { @@ -797,7 +863,7 @@ impl PassDefinitions { let parser_type = self.type_parser.consume_parser_type( iter, &ctx.heap, &module.source, &ctx.symbols, poly_vars, definition_id, SymbolScope::Module(module.root_id), - true, Some(angle_start_pos) + true, false, Some(angle_start_pos) )?; (parser_type.elements, parser_type.full_span.end) @@ -893,7 +959,8 @@ impl PassDefinitions { let parser_type = self.type_parser.consume_parser_type( iter, &ctx.heap, &module.source, &ctx.symbols, poly_vars, - definition_id, SymbolScope::Definition(definition_id), true, None + definition_id, SymbolScope::Definition(definition_id), + true, false, None ); if let Ok(parser_type) = parser_type { @@ -1456,11 +1523,25 @@ impl PassDefinitions { } else if next == Some(TokenKind::Integer) { let (literal, span) = consume_integer_literal(&module.source, iter, &mut self.buffer)?; + ctx.heap.alloc_literal_expression(|this| LiteralExpression { + this, + span, + value: Literal::Integer(LiteralInteger { unsigned_value: literal, negated: false }), + parent: ExpressionParent::None, + type_index: -1, + }).upcast() + } else if next == Some(TokenKind::Bytestring) { + let span = consume_bytestring_literal(&module.source, iter, &mut self.buffer)?; + let mut bytes = Vec::with_capacity(self.buffer.len()); + for byte in self.buffer.as_bytes().iter().copied() { + bytes.push(byte); + } + ctx.heap.alloc_literal_expression(|this| LiteralExpression{ this, span, - value: Literal::Integer(LiteralInteger{ unsigned_value: literal, negated: false }), + value: Literal::Bytestring(bytes), parent: ExpressionParent::None, - type_index: -1, + type_index: -1 }).upcast() } else if next == Some(TokenKind::String) { let span = consume_string_literal(&module.source, iter, &mut self.buffer)?; @@ -1500,7 +1581,7 @@ impl PassDefinitions { let poly_vars = ctx.heap[self.cur_definition].poly_vars(); let parser_type = self.type_parser.consume_parser_type( iter, &ctx.heap, &module.source, &ctx.symbols, poly_vars, self.cur_definition, - symbol_scope, true, None + symbol_scope, true, false, None )?; debug_assert!(!parser_type.elements.is_empty()); match parser_type.elements[0].variant { @@ -1579,22 +1660,21 @@ impl PassDefinitions { }, Definition::Procedure(proc_def) => { // Check whether it is a builtin function + // TODO: Once we start generating bytecode this is unnecessary let procedure_id = proc_def.this; - let method = if proc_def.builtin { - match proc_def.identifier.value.as_bytes() { - KW_FUNC_GET => Method::Get, - KW_FUNC_PUT => Method::Put, - KW_FUNC_FIRES => Method::Fires, - KW_FUNC_CREATE => Method::Create, - KW_FUNC_LENGTH => Method::Length, - KW_FUNC_ASSERT => Method::Assert, - KW_FUNC_PRINT => Method::Print, - _ => unreachable!(), - } - } else if proc_def.kind == ProcedureKind::Function { - Method::UserFunction - } else { - Method::UserComponent + let method = match proc_def.source { + ProcedureSource::FuncUserDefined => Method::UserFunction, + ProcedureSource::CompUserDefined => Method::UserComponent, + ProcedureSource::FuncGet => Method::Get, + ProcedureSource::FuncPut => Method::Put, + ProcedureSource::FuncFires => Method::Fires, + ProcedureSource::FuncCreate => Method::Create, + ProcedureSource::FuncLength => Method::Length, + ProcedureSource::FuncAssert => Method::Assert, + ProcedureSource::FuncPrint => Method::Print, + ProcedureSource::CompRandomU32 => Method::ComponentRandomU32, + ProcedureSource::CompTcpClient => Method::ComponentTcpClient, + _ => todo!("other procedure sources"), }; // Function call: consume the arguments @@ -1668,7 +1748,7 @@ impl PassDefinitions { self.type_parser.consume_parser_type( iter, &ctx.heap, &module.source, &ctx.symbols, poly_vars, definition_id, SymbolScope::Module(module.root_id), - true, Some(angle_start_pos) + true, false, Some(angle_start_pos) )? } else { // Automatic casting with inferred target type @@ -1804,7 +1884,7 @@ fn consume_polymorphic_vars_spilled(source: &InputSource, iter: &mut TokenIter, fn consume_parameter_list( parser: &mut ParserTypeParser, source: &InputSource, iter: &mut TokenIter, ctx: &mut PassCtx, target: &mut ScopedSection, - scope: SymbolScope, definition_id: DefinitionId + scope: SymbolScope, definition_id: DefinitionId, allow_compiler_types: bool ) -> Result<(), ParseError> { consume_comma_separated( TokenKind::OpenParen, TokenKind::CloseParen, source, iter, ctx, @@ -1812,7 +1892,7 @@ fn consume_parameter_list( let poly_vars = ctx.heap[definition_id].poly_vars(); // Rust being rust, multiple lookups let parser_type = parser.consume_parser_type( iter, &ctx.heap, source, &ctx.symbols, poly_vars, definition_id, - scope, false, None + scope, false, allow_compiler_types, None )?; let identifier = consume_ident_interned(source, iter, ctx)?; let parameter_id = ctx.heap.alloc_variable(|this| Variable{