diff --git a/src/protocol/ast.rs b/src/protocol/ast.rs index fd5f3dafd7546664dd5a6f7bb2531f94e57c351f..d352dabae9db5c6a11858ad0c241bcecf69d87c2 100644 --- a/src/protocol/ast.rs +++ b/src/protocol/ast.rs @@ -629,9 +629,10 @@ impl PartialEq for Identifier { } } -impl PartialEq for Identifier { - fn eq(&self, other: &NamespacedIdentifier) -> bool { - return self.value == other.value +impl Display for Identifier { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + // A source identifier is in ASCII range. + write!(f, "{}", String::from_utf8_lossy(&self.value)) } } @@ -677,14 +678,14 @@ impl NamespacedIdentifierPart { /// set of polymorphic arguments at the appropriate position. /// TODO: @tokens Reimplement/rename once we have a tokenizer #[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] -pub struct NamespacedIdentifier2 { +pub struct NamespacedIdentifier { pub position: InputPosition, pub value: Vec, // Full name as it resides in the input source pub poly_args: Vec, // All poly args littered throughout the namespaced identifier pub parts: Vec, // Indices into value/poly_args } -impl NamespacedIdentifier2 { +impl NamespacedIdentifier { /// Returns the identifier value without any of the specific polymorphic /// arguments. pub fn strip_poly_args(&self) -> Vec { @@ -705,8 +706,8 @@ impl NamespacedIdentifier2 { } /// Returns an iterator of the elements in the namespaced identifier - pub fn iter(&self) -> NamespacedIdentifier2Iter { - return NamespacedIdentifier2Iter{ + pub fn iter(&self) -> NamespacedIdentifierIter { + return NamespacedIdentifierIter{ identifier: self, element_idx: 0 } @@ -766,12 +767,12 @@ impl NamespacedIdentifier2 { /// Iterator over elements of the namespaced identifier. The element index will /// only ever be at the start of an identifier element. #[derive(Debug)] -pub struct NamespacedIdentifier2Iter<'a> { - identifier: &'a NamespacedIdentifier2, +pub struct NamespacedIdentifierIter<'a> { + identifier: &'a NamespacedIdentifier, element_idx: usize, } -impl<'a> Iterator for NamespacedIdentifier2Iter<'a> { +impl<'a> Iterator for NamespacedIdentifierIter<'a> { type Item = (&'a [u8], Option<&'a [ParserTypeId]>); fn next(&mut self) -> Option { match self.get(self.element_idx) { @@ -787,7 +788,7 @@ impl<'a> Iterator for NamespacedIdentifier2Iter<'a> { } } -impl<'a> NamespacedIdentifier2Iter<'a> { +impl<'a> NamespacedIdentifierIter<'a> { /// Returns number of parts iterated over, may not correspond to number of /// times one called `next()` because returning an identifier with /// polymorphic arguments increments the internal counter by 2. @@ -862,95 +863,6 @@ impl<'a> NamespacedIdentifier2Iter<'a> { } } -#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] -pub struct NamespacedIdentifier { - pub position: InputPosition, - pub num_namespaces: u8, - pub value: Vec, -} - -impl NamespacedIdentifier { - pub(crate) fn iter(&self) -> NamespacedIdentifierIter { - NamespacedIdentifierIter{ - value: &self.value, - cur_offset: 0, - num_returned: 0, - num_total: self.num_namespaces - } - } -} - -impl PartialEq for NamespacedIdentifier { - fn eq(&self, other: &Self) -> bool { - return self.value == other.value - } -} - -impl PartialEq for NamespacedIdentifier { - fn eq(&self, other: &Identifier) -> bool { - return self.value == other.value; - } -} - -// TODO: Just keep ref to NamespacedIdentifier -pub(crate) struct NamespacedIdentifierIter<'a> { - value: &'a Vec, - cur_offset: usize, - num_returned: u8, - num_total: u8, -} - -impl<'a> NamespacedIdentifierIter<'a> { - pub(crate) fn num_returned(&self) -> u8 { - return self.num_returned; - } - pub(crate) fn num_remaining(&self) -> u8 { - return self.num_total - self.num_returned - } - pub(crate) fn returned_section(&self) -> &[u8] { - // Offset always includes the two trailing ':' characters - let end = if self.cur_offset >= 2 { self.cur_offset - 2 } else { self.cur_offset }; - return &self.value[..end] - } -} - -impl<'a> Iterator for NamespacedIdentifierIter<'a> { - type Item = &'a [u8]; - fn next(&mut self) -> Option { - if self.cur_offset >= self.value.len() { - debug_assert_eq!(self.num_returned, self.num_total); - None - } else { - debug_assert!(self.num_returned < self.num_total); - let start = self.cur_offset; - let mut end = start; - while end < self.value.len() - 1 { - if self.value[end] == b':' && self.value[end + 1] == b':' { - self.cur_offset = end + 2; - self.num_returned += 1; - return Some(&self.value[start..end]); - } - end += 1; - } - - // If NamespacedIdentifier is constructed properly, then we cannot - // end with "::" in the value, so - debug_assert!(end == 0 || (self.value[end - 1] != b':' && self.value[end] != b':')); - debug_assert_eq!(self.num_returned + 1, self.num_total); - self.cur_offset = self.value.len(); - self.num_returned += 1; - return Some(&self.value[start..]); - } - } -} - -impl Display for Identifier { - fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { - // A source identifier is in ASCII range. - write!(f, "{}", String::from_utf8_lossy(&self.value)) - } -} - /// TODO: @types Remove the Message -> Byte hack at some point... #[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] pub enum ParserTypeVariant { @@ -1001,7 +913,7 @@ pub struct ParserType { #[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] pub struct SymbolicParserType { // Phase 1: parser - pub identifier: NamespacedIdentifier2, + pub identifier: NamespacedIdentifier, // Phase 2: validation/linking (for types in function/component bodies) and // type table construction (for embedded types of structs/unions) pub poly_args2: Vec, // taken from identifier or inferred @@ -1078,24 +990,7 @@ pub enum PrimitiveType { Short, Int, Long, - Symbolic(PrimitiveSymbolic) -} - -// TODO: @cleanup, remove PartialEq implementations -#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] -pub struct PrimitiveSymbolic { - // Phase 1: parser - pub(crate) identifier: NamespacedIdentifier, // TODO: @remove at some point, also remove NSIdent itself - // Phase 2: typing - pub(crate) definition: Option -} - -impl PartialEq for PrimitiveSymbolic { - fn eq(&self, other: &Self) -> bool { - self.identifier == other.identifier - } } -impl Eq for PrimitiveSymbolic{} #[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)] pub struct Type { @@ -1156,21 +1051,6 @@ impl Display for Type { PrimitiveType::Long => { write!(f, "long")?; } - PrimitiveType::Symbolic(data) => { - // Type data is in ASCII range. - if let Some(id) = &data.definition { - write!( - f, "Symbolic({}, id: {})", - String::from_utf8_lossy(&data.identifier.value), - id.index - )?; - } else { - write!( - f, "Symbolic({}, id: Unresolved)", - String::from_utf8_lossy(&data.identifier.value) - )?; - } - } } if self.array { write!(f, "[]") @@ -1223,7 +1103,7 @@ pub struct LiteralStructField { #[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] pub struct LiteralStruct { // Phase 1: parser - pub(crate) identifier: NamespacedIdentifier2, + pub(crate) identifier: NamespacedIdentifier, pub(crate) fields: Vec, // Phase 2: linker pub(crate) poly_args2: Vec, // taken from identifier @@ -1233,9 +1113,9 @@ pub struct LiteralStruct { #[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] pub struct LiteralEnum { // Phase 1: parser - pub(crate) identifier: NamespacedIdentifier2, - pub(crate) poly_args: Vec, + pub(crate) identifier: NamespacedIdentifier, // Phase 2: linker + pub(crate) poly_args2: Vec, pub(crate) definition: Option, pub(crate) variant_idx: usize, } @@ -1251,7 +1131,7 @@ pub enum Method { #[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] pub struct MethodSymbolic { - pub(crate) identifier: NamespacedIdentifier2, + pub(crate) identifier: NamespacedIdentifier, pub(crate) definition: Option } @@ -2687,7 +2567,7 @@ pub struct VariableExpression { pub this: VariableExpressionId, // Phase 1: parser pub position: InputPosition, - pub identifier: NamespacedIdentifier2, + pub identifier: NamespacedIdentifier, // Phase 2: linker pub declaration: Option, pub parent: ExpressionParent, diff --git a/src/protocol/lexer.rs b/src/protocol/lexer.rs index 19b3a7e0b137460a9d2d9a1c21889569a5deb857..2d6b024a6d704ae7f38caf46712a5b9b72cc2835 100644 --- a/src/protocol/lexer.rs +++ b/src/protocol/lexer.rs @@ -94,10 +94,10 @@ fn lowercase(x: u8) -> u8 { } } -fn identifier_as_namespaced(identifier: Identifier) -> NamespacedIdentifier2 { +fn identifier_as_namespaced(identifier: Identifier) -> NamespacedIdentifier { let identifier_len = identifier.value.len(); debug_assert!(identifier_len < u16::max_value() as usize); - NamespacedIdentifier2{ + NamespacedIdentifier{ position: identifier.position, value: identifier.value, poly_args: Vec::new(), @@ -441,50 +441,8 @@ impl Lexer<'_> { self.consume_ident()?; Ok(()) } - fn has_namespaced_identifier(&self) -> bool { - self.has_identifier() - } - fn consume_namespaced_identifier(&mut self) -> Result { - if self.has_reserved() { - return Err(self.error_at_pos("Encountered reserved keyword")); - } - - let position = self.source.pos(); - let mut ns_ident = self.consume_ident()?; - let mut num_namespaces = 1; - while self.has_string(b"::") { - self.consume_string(b"::")?; - if num_namespaces >= MAX_NAMESPACES { - return Err(self.error_at_pos("Too many namespaces in identifier")); - } - let new_ident = self.consume_ident()?; - ns_ident.extend(b"::"); - ns_ident.extend(new_ident); - num_namespaces += 1; - } - - Ok(NamespacedIdentifier{ - position, - value: ns_ident, - num_namespaces, - }) - } - fn consume_namespaced_identifier_spilled(&mut self) -> Result<(), ParseError2> { - // TODO: @performance - if self.has_reserved() { - return Err(self.error_at_pos("Encountered reserved keyword")); - } - self.consume_ident()?; - while self.has_string(b"::") { - self.consume_string(b"::")?; - self.consume_ident()?; - } - - Ok(()) - } - - fn consume_namespaced_identifier2(&mut self, h: &mut Heap) -> Result { + fn consume_namespaced_identifier(&mut self, h: &mut Heap) -> Result { if self.has_reserved() { return Err(self.error_at_pos("Encountered reserved keyword")); } @@ -496,7 +454,7 @@ impl Lexer<'_> { // identifier and are instead dealing with a less-than operator. Ugly? // Yes. Needs tokenizer? Yes. fn consume_part( - l: &mut Lexer, h: &mut Heap, ident: &mut NamespacedIdentifier2, + l: &mut Lexer, h: &mut Heap, ident: &mut NamespacedIdentifier, backup_pos: &mut InputPosition ) -> Result<(), ParseError2> { // Consume identifier @@ -531,7 +489,7 @@ impl Lexer<'_> { Ok(()) } - let mut ident = NamespacedIdentifier2{ + let mut ident = NamespacedIdentifier{ position: self.source.pos(), value: Vec::new(), poly_args: Vec::new(), @@ -556,6 +514,39 @@ impl Lexer<'_> { Ok(ident) } + fn consume_namespaced_identifier_spilled(&mut self) -> Result<(), ParseError2> { + if self.has_reserved() { + return Err(self.error_at_pos("Encountered reserved keyword")); + } + + debug_log!("consume_nsident2_spilled: {}", debug_line!(self.source)); + + fn consume_part_spilled(l: &mut Lexer, backup_pos: &mut InputPosition) -> Result<(), ParseError2> { + l.consume_ident()?; + *backup_pos = l.source.pos(); + l.consume_whitespace(false)?; + match l.maybe_consume_poly_args_spilled_without_pos_recovery() { + Ok(true) => { *backup_pos = l.source.pos(); }, + Ok(false) => {}, + Err(_) => { return Err(l.error_at_pos("Failed to parse poly args (spilled)")) }, + } + Ok(()) + } + + let mut backup_pos = self.source.pos(); + consume_part_spilled(self, &mut backup_pos)?; + self.consume_whitespace(false)?; + while self.has_string(b"::") { + self.consume_string(b"::")?; + self.consume_whitespace(false)?; + consume_part_spilled(self, &mut backup_pos)?; + self.consume_whitespace(false)?; + } + + self.source.seek(backup_pos); + Ok(()) + } + // Types and type annotations /// Consumes a type definition. When called the input position should be at @@ -649,7 +640,7 @@ impl Lexer<'_> { ParserTypeVariant::Output(poly_arg) } else { // Must be a symbolic type - let identifier = self.consume_namespaced_identifier2(h)?; + let identifier = self.consume_namespaced_identifier(h)?; ParserTypeVariant::Symbolic(SymbolicParserType{identifier, variant: None, poly_args2: Vec::new()}) }; @@ -710,18 +701,13 @@ impl Lexer<'_> { if self.has_type_keyword() { self.consume_any_chars(); } else { - let ident = self.consume_namespaced_identifier(); + let ident = self.consume_namespaced_identifier_spilled(); if ident.is_err() { return false; } } // Consume any polymorphic arguments that follow the type identifier let mut backup_pos = self.source.pos(); if self.consume_whitespace(false).is_err() { return false; } - match self.maybe_consume_poly_args_spilled_without_pos_recovery() { - Ok(true) => backup_pos = self.source.pos(), - Ok(false) => {}, - Err(()) => return false - } // Consume any array specifiers. Make sure we always leave the input // position at the end of the last array specifier if we do find a @@ -1535,8 +1521,6 @@ impl Lexer<'_> { // a struct literal. let backup_pos = self.source.pos(); let result = self.consume_namespaced_identifier_spilled().is_ok() && - self.consume_whitespace(false).is_ok() && - self.maybe_consume_poly_args_spilled_without_pos_recovery().is_ok() && self.consume_whitespace(false).is_ok() && self.source.next() == Some(b'{'); @@ -1548,7 +1532,7 @@ impl Lexer<'_> { // Consume identifier and polymorphic arguments debug_log!("consume_struct_literal_expression: {}", debug_line!(self.source)); let position = self.source.pos(); - let identifier = self.consume_namespaced_identifier2(h)?; + let identifier = self.consume_namespaced_identifier(h)?; self.consume_whitespace(false)?; // Consume fields @@ -1596,8 +1580,6 @@ impl Lexer<'_> { let mut result = false; if self.consume_namespaced_identifier_spilled().is_ok() && - self.consume_whitespace(false).is_ok() && - self.maybe_consume_poly_args_spilled_without_pos_recovery().is_ok() && self.consume_whitespace(false).is_ok() && self.source.next() == Some(b'(') { // Seems like we have a function call or an enum literal @@ -1628,7 +1610,7 @@ impl Lexer<'_> { self.consume_keyword(b"create")?; method = Method::Create; } else { - let identifier = self.consume_namespaced_identifier2(h)?; + let identifier = self.consume_namespaced_identifier(h)?; method = Method::Symbolic(MethodSymbolic{ identifier, definition: None diff --git a/src/protocol/parser/symbol_table.rs b/src/protocol/parser/symbol_table.rs index 89c19f709e402670704f3f77d4971d18d8585723..48ae5e34e8c3ccb0ea825c196954a2b98bf8593b 100644 --- a/src/protocol/parser/symbol_table.rs +++ b/src/protocol/parser/symbol_table.rs @@ -22,7 +22,7 @@ impl SymbolKey { Self{ module_id, symbol_name: symbol.value.clone() } } - fn from_namespaced_identifier(module_id: RootId, symbol: &NamespacedIdentifier2) -> Self { + fn from_namespaced_identifier(module_id: RootId, symbol: &NamespacedIdentifier) -> Self { Self{ module_id, symbol_name: symbol.strip_poly_args() } } } @@ -335,8 +335,8 @@ impl SymbolTable { /// The returned iterator will always point to the next symbol (even if /// nothing was found) pub(crate) fn resolve_namespaced_identifier<'t, 'i>( - &'t self, root_module_id: RootId, identifier: &'i NamespacedIdentifier2 - ) -> (Option<&'t SymbolValue>, NamespacedIdentifier2Iter<'i>) { + &'t self, root_module_id: RootId, identifier: &'i NamespacedIdentifier + ) -> (Option<&'t SymbolValue>, NamespacedIdentifierIter<'i>) { let mut iter = identifier.iter(); let mut symbol: Option<&SymbolValue> = None; let mut within_module_id = root_module_id; diff --git a/src/protocol/parser/utils.rs b/src/protocol/parser/utils.rs index 4122a16a636f5ec10a4c99f5a25a6d94b8153f51..4dc068027128cdf242a7f28e46714adcb0b94dae 100644 --- a/src/protocol/parser/utils.rs +++ b/src/protocol/parser/utils.rs @@ -6,11 +6,11 @@ use super::type_table::*; /// Utility result type. pub(crate) enum FindTypeResult<'t, 'i> { // Found the type exactly - Found((&'t DefinedType, NamespacedIdentifier2Iter<'i>)), + Found((&'t DefinedType, NamespacedIdentifierIter<'i>)), // Could not match symbol SymbolNotFound{ident_pos: InputPosition}, // Matched part of the namespaced identifier, but not completely - SymbolPartial{ident_pos: InputPosition, ident_iter: NamespacedIdentifier2Iter<'i>}, + SymbolPartial{ident_pos: InputPosition, ident_iter: NamespacedIdentifierIter<'i>}, // Symbol matched, but points to a namespace/module instead of a type SymbolNamespace{ident_pos: InputPosition, symbol_pos: InputPosition}, } @@ -20,7 +20,7 @@ impl<'t, 'i> FindTypeResult<'t, 'i> { /// Utility function to transform the `FindTypeResult` into a `Result` where /// `Ok` contains the resolved type, and `Err` contains a `ParseError` which /// can be readily returned. This is the most common use. - pub(crate) fn as_parse_error(self, module_source: &InputSource) -> Result<(&'t DefinedType, NamespacedIdentifier2Iter<'i>), ParseError2> { + pub(crate) fn as_parse_error(self, module_source: &InputSource) -> Result<(&'t DefinedType, NamespacedIdentifierIter<'i>), ParseError2> { match self { FindTypeResult::Found(defined_type) => Ok(defined_type), FindTypeResult::SymbolNotFound{ident_pos} => { @@ -56,7 +56,7 @@ impl<'t, 'i> FindTypeResult<'t, 'i> { /// must be a type, not a namespace. pub(crate) fn find_type_definition<'t, 'i>( symbols: &SymbolTable, types: &'t TypeTable, - root_id: RootId, identifier: &'i NamespacedIdentifier2 + root_id: RootId, identifier: &'i NamespacedIdentifier ) -> FindTypeResult<'t, 'i> { // Lookup symbol let (symbol, ident_iter) = symbols.resolve_namespaced_identifier(root_id, identifier); @@ -94,7 +94,7 @@ pub(crate) enum MatchPolymorphResult<'t> { Matching, InferAll(usize), Mismatch{defined_type: &'t DefinedType, ident_position: InputPosition, num_specified: usize}, - NoneExpected{defined_type: &'t DefinedType, ident_position: InputPosition, num_specified: usize}, + NoneExpected{defined_type: &'t DefinedType, ident_position: InputPosition}, } impl<'t> MatchPolymorphResult<'t> { @@ -148,7 +148,7 @@ pub(crate) fn match_polymorphic_args_to_vars<'t>( return MatchPolymorphResult::NoneExpected{ defined_type, ident_position, - num_specified: poly_args.unwrap().len()}; + }; } } else { // Polymorphic variables on type diff --git a/src/protocol/parser/visitor_linker.rs b/src/protocol/parser/visitor_linker.rs index e963d585b40e49ce40afb1ba59440cb4ac24a38b..841fea37e5825438c1601f9501ad24e6c91cda68 100644 --- a/src/protocol/parser/visitor_linker.rs +++ b/src/protocol/parser/visitor_linker.rs @@ -1239,7 +1239,7 @@ impl ValidityAndLinkerVisitor { /// Finds a variable in the visitor's scope that must appear before the /// specified relative position within that block. - fn find_variable(&self, ctx: &Ctx, mut relative_pos: u32, identifier: &NamespacedIdentifier2) -> Result { + fn find_variable(&self, ctx: &Ctx, mut relative_pos: u32, identifier: &NamespacedIdentifier) -> Result { debug_assert!(self.cur_scope.is_some()); debug_assert!(identifier.parts.len() == 1, "implement namespaced seeking of target associated with identifier"); @@ -1373,7 +1373,7 @@ impl ValidityAndLinkerVisitor { // borrowing errors fn find_symbol_of_type<'a>( &self, source: &InputSource, root_id: RootId, symbols: &SymbolTable, types: &'a TypeTable, - identifier: &NamespacedIdentifier2, expected_type_class: TypeClass + identifier: &NamespacedIdentifier, expected_type_class: TypeClass ) -> Result<&'a DefinedType, ParseError2> { // Find symbol associated with identifier let (find_result, _) = find_type_definition(symbols, types, root_id, identifier)