diff --git a/src/protocol/parser/pass_definitions.rs b/src/protocol/parser/pass_definitions.rs index b7f2f70ae093791e51e0fb9a5ec6555e0bf36ce4..0d15c82d1eab600fdb4e7ecd22714d90077f593d 100644 --- a/src/protocol/parser/pass_definitions.rs +++ b/src/protocol/parser/pass_definitions.rs @@ -132,9 +132,9 @@ impl PassDefinitions { let poly_vars = ctx.heap[definition_id].poly_vars(); let start_pos = iter.last_valid_pos(); - let parser_type = consume_parser_type( - source, iter, &ctx.symbols, &ctx.heap, poly_vars, module_scope, - definition_id, false, 0 + let parser_type = self.type_parser.consume_parser_type( + iter, &ctx.heap, source, &ctx.symbols, poly_vars, definition_id, + module_scope, false, None )?; let field = consume_ident_interned(source, iter, ctx)?; Ok(StructFieldDefinition{ @@ -219,9 +219,9 @@ impl PassDefinitions { TokenKind::OpenParen, TokenKind::CloseParen, source, iter, ctx, |source, iter, ctx| { let poly_vars = ctx.heap[definition_id].poly_vars(); - consume_parser_type( - source, iter, &ctx.symbols, &ctx.heap, poly_vars, - module_scope, definition_id, false, 0 + self.type_parser.consume_parser_type( + iter, &ctx.heap, source, &ctx.symbols, poly_vars, definition_id, + module_scope, false, None ) }, &mut types_section, "an embedded type", Some(&mut close_pos) @@ -267,7 +267,7 @@ impl PassDefinitions { // Parse function's argument list let mut parameter_section = self.variables.start_section(); consume_parameter_list( - &module.source, iter, ctx, &mut parameter_section, module_scope, definition_id + &mut self.type_parser, &module.source, iter, ctx, &mut parameter_section, module_scope, definition_id )?; let parameters = parameter_section.into_vec(); @@ -279,7 +279,10 @@ impl PassDefinitions { TokenKind::OpenCurly, &module.source, iter, ctx, |source, iter, ctx| { let poly_vars = ctx.heap[definition_id].poly_vars(); - consume_parser_type(source, iter, &ctx.symbols, &ctx.heap, poly_vars, module_scope, definition_id, false, 0) + self.type_parser.consume_parser_type( + iter, &ctx.heap, source, &ctx.symbols, poly_vars, definition_id, + module_scope, false, None + ) }, &mut return_types, "a return type", Some(&mut open_curly_pos) )?; @@ -322,7 +325,7 @@ impl PassDefinitions { // Parse component's argument list let mut parameter_section = self.variables.start_section(); consume_parameter_list( - &module.source, iter, ctx, &mut parameter_section, module_scope, definition_id + &mut self.type_parser, &module.source, iter, ctx, &mut parameter_section, module_scope, definition_id )?; let parameters = parameter_section.into_vec(); @@ -740,13 +743,14 @@ impl PassDefinitions { // Retrieve the type of the channel, we're cheating a bit here by // consuming the first '<' and setting the initial angle depth to 1 // such that our final '>' will be consumed as well. + let angle_start_pos = iter.next_start_position(); iter.consume(); let definition_id = self.cur_definition; let poly_vars = ctx.heap[definition_id].poly_vars(); - let parser_type = consume_parser_type( - &module.source, iter, &ctx.symbols, &ctx.heap, - poly_vars, SymbolScope::Module(module.root_id), definition_id, - true, 1 + let parser_type = self.type_parser.consume_parser_type( + iter, &ctx.heap, &module.source, &ctx.symbols, poly_vars, + definition_id, SymbolScope::Module(module.root_id), + true, Some(angle_start_pos) )?; (parser_type.elements, parser_type.full_span.end) @@ -859,9 +863,9 @@ impl PassDefinitions { let definition_id = self.cur_definition; let poly_vars = ctx.heap[definition_id].poly_vars(); - let parser_type = consume_parser_type( - &module.source, iter, &ctx.symbols, &ctx.heap, poly_vars, - SymbolScope::Definition(definition_id), definition_id, true, 0 + let parser_type = self.type_parser.consume_parser_type( + iter, &ctx.heap, &module.source, &ctx.symbols, poly_vars, + definition_id, SymbolScope::Definition(definition_id), true, None ); if let Ok(parser_type) = parser_type { @@ -1393,9 +1397,9 @@ impl PassDefinitions { let symbol_scope = SymbolScope::Definition(self.cur_definition); let poly_vars = ctx.heap[self.cur_definition].poly_vars(); - let parser_type = consume_parser_type( - &module.source, iter, &ctx.symbols, &ctx.heap, poly_vars, symbol_scope, - self.cur_definition, true, 0 + let parser_type = self.type_parser.consume_parser_type( + iter, &ctx.heap, &module.source, &ctx.symbols, poly_vars, self.cur_definition, + symbol_scope, true, None )?; debug_assert!(!parser_type.elements.is_empty()); match parser_type.elements[0].variant { @@ -1571,13 +1575,14 @@ impl PassDefinitions { // Casting expression iter.consume(); let to_type = if Some(TokenKind::OpenAngle) == iter.next() { + let angle_start_pos = iter.next_start_position(); iter.consume(); let definition_id = self.cur_definition; let poly_vars = ctx.heap[definition_id].poly_vars(); - consume_parser_type( - &module.source, iter, &ctx.symbols, &ctx.heap, - poly_vars, SymbolScope::Module(module.root_id), definition_id, - true, 1 + self.type_parser.consume_parser_type( + iter, &ctx.heap, &module.source, &ctx.symbols, + poly_vars, definition_id, SymbolScope::Module(module.root_id), + true, Some(angle_start_pos) )? } else { // Automatic casting with inferred target type @@ -1697,456 +1702,6 @@ impl PassDefinitions { } } -/// Consumes a type. A type always starts with an identifier which may indicate -/// a builtin type or a user-defined type. The fact that it may contain -/// polymorphic arguments makes it a tree-like structure. Because we cannot rely -/// on knowing the exact number of polymorphic arguments we do not check for -/// these. -/// -/// Note that the first depth index is used as a hack. -// TODO: @Optimize, @Cleanup -fn consume_parser_type( - source: &InputSource, iter: &mut TokenIter, symbols: &SymbolTable, heap: &Heap, poly_vars: &[Identifier], - cur_scope: SymbolScope, wrapping_definition: DefinitionId, allow_inference: bool, first_angle_depth: i32, -) -> Result { - struct Entry{ - element: ParserTypeElement, - depth: i32, - } - - // After parsing the array modifier "[]", we need to insert an array type - // before the most recently parsed type. - fn insert_array_before(elements: &mut Vec, depth: i32, span: InputSpan) { - let index = elements.iter().rposition(|e| e.depth == depth).unwrap(); - let num_embedded = elements[index].element.variant.num_embedded(); - elements.insert(index, Entry{ - element: ParserTypeElement{ element_span: span, variant: ParserTypeVariant::Array }, - depth, - }); - - // Now the original element, and all of its children, should have their - // depth incremented by 1 - elements[index + 1].depth += 1; - if num_embedded != 0 { - for idx in index + 2..elements.len() { - let element = &mut elements[idx]; - if element.depth >= depth + 1 { - element.depth += 1; - } else { - break; - } - } - } - } - - // Most common case we just have one type, perhaps with some array - // annotations. This is both the hot-path, and simplifies the state machine - // that follows and is responsible for parsing more complicated types. - let element = consume_parser_type_ident( - source, iter, symbols, heap, poly_vars, cur_scope, - wrapping_definition, allow_inference - )?; - - if iter.next() != Some(TokenKind::OpenAngle) { - let num_embedded = element.variant.num_embedded(); - let first_pos = element.element_span.begin; - let mut last_pos = element.element_span.end; - let mut elements = Vec::with_capacity(num_embedded + 2); // type itself + embedded + 1 (maybe) array type - - // Consume any potential array elements - while iter.next() == Some(TokenKind::OpenSquare) { - let mut array_span = iter.next_span(); - iter.consume(); - - let end_span = iter.next_span(); - array_span.end = end_span.end; - consume_token(source, iter, TokenKind::CloseSquare)?; - - last_pos = end_span.end; - elements.push(ParserTypeElement{ element_span: array_span, variant: ParserTypeVariant::Array }); - } - - // Push the element itself - let element_span = element.element_span; - elements.push(element); - - // Check if polymorphic arguments are expected - if num_embedded != 0 { - if !allow_inference { - return Err(ParseError::new_error_str_at_span(source, element_span, "type inference is not allowed here")); - } - - for _ in 0..num_embedded { - elements.push(ParserTypeElement { element_span, variant: ParserTypeVariant::Inferred }); - } - } - - // When we have applied the initial-open-angle hack (e.g. consuming an - // explicit type on a channel), then we consume the closing angles as - // well. - for _ in 0..first_angle_depth { - let (_, angle_end_pos) = iter.next_positions(); - last_pos = angle_end_pos; - consume_token(source, iter, TokenKind::CloseAngle)?; - } - - return Ok(ParserType{ - elements, - full_span: InputSpan::from_positions(first_pos, last_pos) - }); - }; - - // We have a polymorphic specification. So we start by pushing the item onto - // our stack, then start adding entries together with the angle-brace depth - // at which they're found. - let mut elements = Vec::new(); - let first_pos = element.element_span.begin; - let mut last_pos = element.element_span.end; - elements.push(Entry{ element, depth: 0 }); - - // Start out with the first '<' consumed. - iter.consume(); - enum State { Ident, Open, Close, Comma } - let mut state = State::Open; - let mut angle_depth = first_angle_depth + 1; - - loop { - let next = iter.next(); - - match state { - State::Ident => { - // Just parsed an identifier, may expect comma, angled braces, - // or the tokens indicating an array - if Some(TokenKind::OpenAngle) == next { - angle_depth += 1; - state = State::Open; - } else if Some(TokenKind::CloseAngle) == next { - let (_, end_angle_pos) = iter.next_positions(); - last_pos = end_angle_pos; - angle_depth -= 1; - state = State::Close; - } else if Some(TokenKind::ShiftRight) == next { - let (_, end_angle_pos) = iter.next_positions(); - last_pos = end_angle_pos; - angle_depth -= 2; - state = State::Close; - } else if Some(TokenKind::Comma) == next { - state = State::Comma; - } else if Some(TokenKind::OpenSquare) == next { - let (start_pos, _) = iter.next_positions(); - iter.consume(); // consume opening square - if iter.next() != Some(TokenKind::CloseSquare) { - return Err(ParseError::new_error_str_at_pos( - source, iter.last_valid_pos(), - "unexpected token: expected ']'" - )); - } - let (_, end_pos) = iter.next_positions(); - let array_span = InputSpan::from_positions(start_pos, end_pos); - insert_array_before(&mut elements, angle_depth, array_span); - } else { - return Err(ParseError::new_error_str_at_pos( - source, iter.last_valid_pos(), - "unexpected token: expected '<', '>', ',' or '['") - ); - } - - iter.consume(); - }, - State::Open => { - // Just parsed an opening angle bracket, expecting an identifier - let element = consume_parser_type_ident(source, iter, symbols, heap, poly_vars, cur_scope, wrapping_definition, allow_inference)?; - elements.push(Entry{ element, depth: angle_depth }); - state = State::Ident; - }, - State::Close => { - // Just parsed 1 or 2 closing angle brackets, expecting comma, - // more closing brackets or the tokens indicating an array - if Some(TokenKind::Comma) == next { - state = State::Comma; - } else if Some(TokenKind::CloseAngle) == next { - let (_, end_angle_pos) = iter.next_positions(); - last_pos = end_angle_pos; - angle_depth -= 1; - state = State::Close; - } else if Some(TokenKind::ShiftRight) == next { - let (_, end_angle_pos) = iter.next_positions(); - last_pos = end_angle_pos; - angle_depth -= 2; - state = State::Close; - } else if Some(TokenKind::OpenSquare) == next { - let (start_pos, _) = iter.next_positions(); - iter.consume(); - if iter.next() != Some(TokenKind::CloseSquare) { - return Err(ParseError::new_error_str_at_pos( - source, iter.last_valid_pos(), - "unexpected token: expected ']'" - )); - } - let (_, end_pos) = iter.next_positions(); - let array_span = InputSpan::from_positions(start_pos, end_pos); - insert_array_before(&mut elements, angle_depth, array_span); - } else { - return Err(ParseError::new_error_str_at_pos( - source, iter.last_valid_pos(), - "unexpected token: expected ',', '>', or '['") - ); - } - - iter.consume(); - }, - State::Comma => { - // Just parsed a comma, expecting an identifier or more closing - // braces - if Some(TokenKind::Ident) == next { - let element = consume_parser_type_ident(source, iter, symbols, heap, poly_vars, cur_scope, wrapping_definition, allow_inference)?; - elements.push(Entry{ element, depth: angle_depth }); - state = State::Ident; - } else if Some(TokenKind::CloseAngle) == next { - let (_, end_angle_pos) = iter.next_positions(); - last_pos = end_angle_pos; - iter.consume(); - angle_depth -= 1; - state = State::Close; - } else if Some(TokenKind::ShiftRight) == next { - let (_, end_angle_pos) = iter.next_positions(); - last_pos = end_angle_pos; - iter.consume(); - angle_depth -= 2; - state = State::Close; - } else { - return Err(ParseError::new_error_str_at_pos( - source, iter.last_valid_pos(), - "unexpected token: expected '>' or a type name" - )); - } - } - } - - if angle_depth < 0 { - return Err(ParseError::new_error_str_at_pos(source, iter.last_valid_pos(), "unmatched '>'")); - } else if angle_depth == 0 { - break; - } - } - - // If here then we have found the correct number of angle braces. - - // Check for trailing array identifiers - while Some(TokenKind::OpenSquare) == iter.next() { - let (array_start, _) = iter.next_positions(); - iter.consume(); - if Some(TokenKind::CloseSquare) != iter.next() { - return Err(ParseError::new_error_str_at_pos( - source, iter.last_valid_pos(), - "unexpected token: expected ']'" - )); - } - let (_, array_end) = iter.next_positions(); - iter.consume(); - insert_array_before(&mut elements, 0, InputSpan::from_positions(array_start, array_end)) - } - - // If here then we found the correct number of angle braces. But we still - // need to make sure that each encountered type has the correct number of - // embedded types. - for idx in 0..elements.len() { - let cur_element = &elements[idx]; - - let expected_subtypes = cur_element.element.variant.num_embedded(); - let mut encountered_subtypes = 0; - for peek_idx in idx + 1..elements.len() { - let peek_element = &elements[peek_idx]; - if peek_element.depth == cur_element.depth + 1 { - encountered_subtypes += 1; - } else if peek_element.depth <= cur_element.depth { - break; - } - } - - if expected_subtypes != encountered_subtypes { - if encountered_subtypes == 0 { - // Case where we have elided the embedded types, all of them - // should be inferred. - if !allow_inference { - return Err(ParseError::new_error_str_at_span( - source, cur_element.element.element_span, - "type inference is not allowed here" - )); - } - - // Insert the missing types (in reverse order, but they're all - // of the "inferred" type anyway). - let inserted_span = cur_element.element.element_span; - let inserted_depth = cur_element.depth + 1; - elements.reserve(expected_subtypes); - for _ in 0..expected_subtypes { - elements.insert(idx + 1, Entry{ - element: ParserTypeElement{ element_span: inserted_span, variant: ParserTypeVariant::Inferred }, - depth: inserted_depth, - }); - } - } else { - // Mismatch in number of embedded types, produce a neat error - // message. - let type_name = String::from_utf8_lossy(source.section_at_span(cur_element.element.element_span)); - fn polymorphic_name_text(num: usize) -> &'static str { - if num == 1 { "polymorphic argument" } else { "polymorphic arguments" } - } - fn were_or_was(num: usize) -> &'static str { - if num == 1 { "was" } else { "were" } - } - - if expected_subtypes == 0 { - return Err(ParseError::new_error_at_span( - source, cur_element.element.element_span, - format!( - "the type '{}' is not polymorphic, yet {} {} {} provided", - type_name, encountered_subtypes, polymorphic_name_text(encountered_subtypes), - were_or_was(encountered_subtypes) - ) - )); - } - - let maybe_infer_text = if allow_inference { - " (or none, to perform implicit type inference)" - } else { - "" - }; - - return Err(ParseError::new_error_at_span( - source, cur_element.element.element_span, - format!( - "expected {} {}{} for the type '{}', but {} {} provided", - expected_subtypes, polymorphic_name_text(expected_subtypes), - maybe_infer_text, type_name, encountered_subtypes, - were_or_was(encountered_subtypes) - ) - )); - } - } - } - - let mut constructed_elements = Vec::with_capacity(elements.len()); - for element in elements.into_iter() { - constructed_elements.push(element.element); - } - - Ok(ParserType{ - elements: constructed_elements, - full_span: InputSpan::from_positions(first_pos, last_pos) - }) -} - -/// Consumes an identifier for which we assume that it resolves to some kind of -/// type. Once we actually arrive at a type we will stop parsing. Hence there -/// may be trailing '::' tokens in the iterator, or the subsequent specification -/// of polymorphic arguments. -fn consume_parser_type_ident( - source: &InputSource, iter: &mut TokenIter, symbols: &SymbolTable, heap: &Heap, poly_vars: &[Identifier], - mut scope: SymbolScope, wrapping_definition: DefinitionId, allow_inference: bool, -) -> Result { - use ParserTypeVariant as PTV; - let (mut type_text, mut type_span) = consume_any_ident(source, iter)?; - - let variant = match type_text { - KW_TYPE_MESSAGE => PTV::Message, - KW_TYPE_BOOL => PTV::Bool, - KW_TYPE_UINT8 => PTV::UInt8, - KW_TYPE_UINT16 => PTV::UInt16, - KW_TYPE_UINT32 => PTV::UInt32, - KW_TYPE_UINT64 => PTV::UInt64, - KW_TYPE_SINT8 => PTV::SInt8, - KW_TYPE_SINT16 => PTV::SInt16, - KW_TYPE_SINT32 => PTV::SInt32, - KW_TYPE_SINT64 => PTV::SInt64, - KW_TYPE_IN_PORT => PTV::Input, - KW_TYPE_OUT_PORT => PTV::Output, - KW_TYPE_CHAR => PTV::Character, - KW_TYPE_STRING => PTV::String, - KW_TYPE_INFERRED => { - if !allow_inference { - return Err(ParseError::new_error_str_at_span(source, type_span, "type inference is not allowed here")); - } - - PTV::Inferred - }, - _ => { - // Must be some kind of symbolic type - let mut type_kind = None; - for (poly_idx, poly_var) in poly_vars.iter().enumerate() { - if poly_var.value.as_bytes() == type_text { - type_kind = Some(PTV::PolymorphicArgument(wrapping_definition, poly_idx as u32)); - } - } - - if type_kind.is_none() { - // Check symbol table for definition. To be fair, the language - // only allows a single namespace for now. That said: - let last_symbol = symbols.get_symbol_by_name(scope, type_text); - if last_symbol.is_none() { - return Err(ParseError::new_error_str_at_span(source, type_span, "unknown type")); - } - let mut last_symbol = last_symbol.unwrap(); - - loop { - match &last_symbol.variant { - SymbolVariant::Module(symbol_module) => { - // Expecting more identifiers - if Some(TokenKind::ColonColon) != iter.next() { - return Err(ParseError::new_error_str_at_span(source, type_span, "expected a type but got a module")); - } - - consume_token(source, iter, TokenKind::ColonColon)?; - - // Consume next part of type and prepare for next - // lookup loop - let (next_text, next_span) = consume_any_ident(source, iter)?; - let old_text = type_text; - type_text = next_text; - type_span.end = next_span.end; - scope = SymbolScope::Module(symbol_module.root_id); - - let new_symbol = symbols.get_symbol_by_name_defined_in_scope(scope, type_text); - if new_symbol.is_none() { - // If the type is imported in the module then notify the programmer - // that imports do not leak outside of a module - let type_name = String::from_utf8_lossy(type_text); - let module_name = String::from_utf8_lossy(old_text); - let suffix = if symbols.get_symbol_by_name(scope, type_text).is_some() { - format!( - ". The module '{}' does import '{}', but these imports are not visible to other modules", - &module_name, &type_name - ) - } else { - String::new() - }; - - return Err(ParseError::new_error_at_span( - source, next_span, - format!("unknown type '{}' in module '{}'{}", type_name, module_name, suffix) - )); - } - - last_symbol = new_symbol.unwrap(); - }, - SymbolVariant::Definition(symbol_definition) => { - let num_poly_vars = heap[symbol_definition.definition_id].poly_vars().len(); - type_kind = Some(PTV::Definition(symbol_definition.definition_id, num_poly_vars as u32)); - break; - } - } - } - } - - debug_assert!(type_kind.is_some()); - type_kind.unwrap() - }, - }; - - Ok(ParserTypeElement{ element_span: type_span, variant }) -} - /// Consumes polymorphic variables and throws them on the floor. fn consume_polymorphic_vars_spilled(source: &InputSource, iter: &mut TokenIter, _ctx: &mut PassCtx) -> Result<(), ParseError> { maybe_consume_comma_separated_spilled( @@ -2161,16 +1716,17 @@ fn consume_polymorphic_vars_spilled(source: &InputSource, iter: &mut TokenIter, /// Consumes the parameter list to functions/components fn consume_parameter_list( - source: &InputSource, iter: &mut TokenIter, ctx: &mut PassCtx, - target: &mut ScopedSection, scope: SymbolScope, definition_id: DefinitionId + parser: &mut ParserTypeParser, source: &InputSource, iter: &mut TokenIter, + ctx: &mut PassCtx, target: &mut ScopedSection, + scope: SymbolScope, definition_id: DefinitionId ) -> Result<(), ParseError> { consume_comma_separated( TokenKind::OpenParen, TokenKind::CloseParen, source, iter, ctx, |source, iter, ctx| { let poly_vars = ctx.heap[definition_id].poly_vars(); // Rust being rust, multiple lookups - let parser_type = consume_parser_type( - source, iter, &ctx.symbols, &ctx.heap, poly_vars, scope, - definition_id, false, 0 + let parser_type = parser.consume_parser_type( + iter, &ctx.heap, source, &ctx.symbols, poly_vars, definition_id, + scope, false, None )?; let identifier = consume_ident_interned(source, iter, ctx)?; let parameter_id = ctx.heap.alloc_variable(|this| Variable{