diff --git a/src/protocol/ast.rs b/src/protocol/ast.rs index ef3c034496a8bbbe65a43dbf1993211cfa51d449..63c154ac40a2ef0ede62bd9f1f2b0557a4d7c9cc 100644 --- a/src/protocol/ast.rs +++ b/src/protocol/ast.rs @@ -587,7 +587,7 @@ pub struct ImportModule { // Phase 1: parser pub position: InputPosition, pub module_name: Vec, - pub alias: Vec, + pub alias: Identifier, // Phase 2: module resolving pub module_id: Option, } @@ -596,8 +596,8 @@ pub struct ImportModule { pub struct AliasedSymbol { // Phase 1: parser pub position: InputPosition, - pub name: Vec, - pub alias: Vec, + pub name: Identifier, + pub alias: Identifier, // Phase 2: symbol resolving pub definition_id: Option, } @@ -644,14 +644,14 @@ pub enum NamespacedIdentifierPart { } impl NamespacedIdentifierPart { - fn is_identifier(&self) -> bool { + pub(crate) fn is_identifier(&self) -> bool { match self { NamespacedIdentifierPart::Identifier{..} => true, NamespacedIdentifierPart::PolyArgs{..} => false, } } - fn as_identifier(&self) -> (u16, u16) { + pub(crate) fn as_identifier(&self) -> (u16, u16) { match self { NamespacedIdentifierPart::Identifier{start, end} => (*start, *end), NamespacedIdentifierPart::PolyArgs{..} => { @@ -660,7 +660,7 @@ impl NamespacedIdentifierPart { } } - fn as_poly_args(&self) -> (u16, u16) { + pub(crate) fn as_poly_args(&self) -> (u16, u16) { match self { NamespacedIdentifierPart::PolyArgs{start, end} => (*start, *end), NamespacedIdentifierPart::Identifier{..} => { @@ -670,7 +670,11 @@ impl NamespacedIdentifierPart { } } -/// An identifier with optional namespaces and polymorphic variables +/// An identifier with optional namespaces and polymorphic variables. Note that +/// we allow each identifier to be followed by polymorphic arguments during the +/// parsing phase (e.g. Foo::Bar::Qux). But in our current language +/// implementation we can only have valid namespaced identifier that contain one +/// set of polymorphic arguments at the appropriate position. #[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] pub struct NamespacedIdentifier2 { pub position: InputPosition, @@ -686,6 +690,10 @@ impl NamespacedIdentifier2 { element_idx: 0 } } + + pub fn has_poly_args(&self) -> bool { + return !self.poly_args.is_empty(); + } } impl PartialEq for NamespacedIdentifier2 { diff --git a/src/protocol/lexer.rs b/src/protocol/lexer.rs index 9eef0fdef53f2aa0a6e767c29e3ac84fd9d2a4ed..9a8f3a73c3d40fa8007aae4686e1831ca7762635 100644 --- a/src/protocol/lexer.rs +++ b/src/protocol/lexer.rs @@ -2394,6 +2394,7 @@ impl Lexer<'_> { self.consume_keyword(b"import")?; self.consume_whitespace(true)?; let mut value = Vec::new(); + let mut last_ident_pos = self.source.pos(); let mut ident = self.consume_ident()?; value.append(&mut ident); let mut last_ident_start = 0; @@ -2401,6 +2402,7 @@ impl Lexer<'_> { while self.has_string(b".") { self.consume_string(b".")?; value.push(b'.'); + last_ident_pos = self.source.pos(); ident = self.consume_ident()?; last_ident_start = value.len(); value.append(&mut ident); @@ -2413,7 +2415,7 @@ impl Lexer<'_> { let import = if self.has_string(b"as") { self.consume_string(b"as")?; self.consume_whitespace(true)?; - let alias = self.consume_ident()?; + let alias = self.consume_identifier()?; h.alloc_import(|this| Import::Module(ImportModule{ this, @@ -2433,7 +2435,7 @@ impl Lexer<'_> { |lexer, _heap| { // Symbol name let position = lexer.source.pos(); - let name = lexer.consume_ident()?; + let name = lexer.consume_identifier()?; lexer.consume_whitespace(false)?; // Symbol alias @@ -2441,7 +2443,7 @@ impl Lexer<'_> { // With alias lexer.consume_string(b"as")?; lexer.consume_whitespace(true)?; - let alias = lexer.consume_ident()?; + let alias = lexer.consume_identifier()?; Ok(AliasedSymbol{ position, @@ -2482,12 +2484,12 @@ impl Lexer<'_> { })) } else if self.has_identifier() { let position = self.source.pos(); - let name = self.consume_ident()?; + let name = self.consume_identifier()?; self.consume_whitespace(false)?; let alias = if self.has_string(b"as") { self.consume_string(b"as")?; self.consume_whitespace(true)?; - self.consume_ident()? + self.consume_identifier()? } else { name.clone() }; @@ -2509,12 +2511,15 @@ impl Lexer<'_> { } } else { // No explicit alias or subimports, so implicit alias - let alias = Vec::from(&value[last_ident_start..]); + let alias_value = Vec::from(&value[last_ident_start..]); h.alloc_import(|this| Import::Module(ImportModule{ this, position, module_name: value, - alias, + alias: Identifier{ + position: last_ident_pos, + value: Vec::from(alias_value), + }, module_id: None, })) }; diff --git a/src/protocol/parser/symbol_table.rs b/src/protocol/parser/symbol_table.rs index b6054bef4c0b56f3ab45e32f1f942a55d16974b8..b310f039839b573bd8e08430c6f8a84dd9dc1ce4 100644 --- a/src/protocol/parser/symbol_table.rs +++ b/src/protocol/parser/symbol_table.rs @@ -17,6 +17,29 @@ struct SymbolKey { symbol_name: Vec, } +impl SymbolKey { + fn from_identifier(module_id: RootId, symbol: &Identifier) -> Self { + Self{ module_id, symbol_name: symbol.value.clone() } + } + + fn from_namespaced_identifier(module_id: RootId, symbol: &NamespacedIdentifier2) -> Self { + // Strip polymorpic arguments from the identifier + let symbol_name = Vec::with_capacity(symbol.value.len()); + debug_assert!(symbol.parts.len() > 0 && symbol.parts[0].is_identifier()); + + let mut iter = symbol.iter(); + let (first_ident, _) = iter.next().unwrap(); + symbol_name.extend(first_ident); + + for (ident, _) in iter { + symbol_name.push(b':'); + symbol_name.extend(ident); + } + + Self{ module_id, symbol_name } + } +} + pub(crate) enum Symbol { Namespace(RootId), Definition((RootId, DefinitionId)), @@ -144,7 +167,7 @@ impl SymbolTable { let definition = &heap[*definition_id]; let identifier = definition.identifier(); if let Err(previous_position) = self.add_definition_symbol( - module.root_id, identifier.position, &identifier.value, + identifier.position, SymbolKey::from_identifier(module.root_id, &identifier), module.root_id, *definition_id ) { return Err( @@ -175,8 +198,8 @@ impl SymbolTable { // Add the target module under its alias if let Err(previous_position) = self.add_namespace_symbol( - module.root_id, import.position, - &import.alias, target_root_id + import.position, SymbolKey::from_identifier(module.root_id, &import.alias), + target_root_id ) { return Err( ParseError2::new_error(&module.source, import.position, "Symbol is multiply defined") @@ -202,7 +225,7 @@ impl SymbolTable { let definition = &heap[*definition_id]; let identifier = definition.identifier(); if let Err(previous_position) = self.add_definition_symbol( - module.root_id, import.position, &identifier.value, + import.position, SymbolKey::from_identifier(module.root_id, identifier), target_root_id, *definition_id ) { return Err( @@ -232,7 +255,8 @@ impl SymbolTable { // to "import a module's imported symbol". And so if we do find // a symbol match, we need to make sure it is a definition from // within that module by checking `source_root_id == target_root_id` - let target_symbol = self.resolve_symbol(target_root_id, &symbol.name); + let key = SymbolKey::from_identifier(target_root_id, &symbol.name); + let target_symbol = self.symbol_lookup.get(&key); let symbol_definition_id = match target_symbol { Some(target_symbol) => { match target_symbol.symbol { @@ -262,7 +286,7 @@ impl SymbolTable { let symbol_definition_id = symbol_definition_id.unwrap(); if let Err(previous_position) = self.add_definition_symbol( - module.root_id, symbol.position, &symbol.alias, + symbol.position, SymbolKey::from_identifier(module.root_id, &symbol.alias), target_root_id, symbol_definition_id ) { return Err( @@ -302,20 +326,16 @@ impl SymbolTable { self.module_lookup.get(identifier).map(|v| *v) } - /// Resolves a symbol within a particular module, indicated by its RootId, - /// with a single non-namespaced identifier - pub(crate) fn resolve_symbol(&self, within_module_id: RootId, identifier: &Vec) -> Option<&SymbolValue> { - self.symbol_lookup.get(&SymbolKey{ module_id: within_module_id, symbol_name: identifier.clone() }) - } - /// Resolves a namespaced symbol. This method will go as far as possible in /// going to the right symbol. It will halt the search when: /// 1. Polymorphic arguments are encountered on the identifier. /// 2. A non-namespace symbol is encountered. /// 3. A part of the identifier couldn't be resolved to anything + /// The returned iterator will always point to the next symbol (even if + /// nothing was found) pub(crate) fn resolve_namespaced_symbol<'t, 'i>( &'t self, root_module_id: RootId, identifier: &'i NamespacedIdentifier2 - ) -> (Option<&'t Symbol>, &'i NamespacedIdentifier2Iter) { + ) -> (Option<&'t SymbolValue>, NamespacedIdentifier2Iter<'i>) { let mut iter = identifier.iter(); let mut symbol: Option<&SymbolValue> = None; let mut within_module_id = root_module_id; @@ -328,6 +348,7 @@ impl SymbolTable { match new_symbol { None => { // Can't find anything + symbol = None; break; }, Some(new_symbol) => { @@ -338,6 +359,14 @@ impl SymbolTable { match &new_symbol.symbol { Symbol::Namespace(new_root_id) => { if root_module_id != within_module_id { + // This new symbol is imported by a foreign + // module, so this is an error + debug_assert!(symbol.is_some()); + debug_assert!(symbol.unwrap().is_namespace()); + debug_assert!(iter.num_returned() > 1); + symbol = None; + break; + } within_module_id = *new_root_id; symbol = Some(new_symbol); }, @@ -347,13 +376,11 @@ impl SymbolTable { // module. if root_module_id != within_module_id && within_module_id != *definition_root_id { // This is an imported definition within the module - // TODO: Maybe factor out? Dunno... + // So keep the old debug_assert!(symbol.is_some()); debug_assert!(symbol.unwrap().is_namespace()); debug_assert!(iter.num_returned() > 1); - let to_skip = iter.num_returned() - 1; - iter = identifier.iter(); - for _ in 0..to_skip { iter.next(); } + symbol = None; break; } symbol = Some(new_symbol); @@ -362,11 +389,17 @@ impl SymbolTable { } } } + + if poly_args.is_some() { + // Polymorphic argument specification should also be a fully + // resolved result. + break; + } } match symbol { - None => Ok(None), - Some(symbol) => Ok(Some((symbol, iter))) + None => (None, iter), + Some(symbol) => (Some(symbol), iter) } } @@ -377,12 +410,8 @@ impl SymbolTable { // Note: I would love to return a reference to the value, but Rust is // preventing me from doing so... That, or I'm not smart enough... fn add_namespace_symbol( - &mut self, origin_module_id: RootId, origin_position: InputPosition, symbol_name: &Vec, target_module_id: RootId + &mut self, origin_position: InputPosition, key: SymbolKey, target_module_id: RootId ) -> Result<(), InputPosition> { - let key = SymbolKey{ - module_id: origin_module_id, - symbol_name: symbol_name.clone() - }; match self.symbol_lookup.entry(key) { Entry::Occupied(o) => Err(o.get().position), Entry::Vacant(v) => { @@ -400,13 +429,9 @@ impl SymbolTable { /// together with the previous definition's source position (in the origin /// module's source file). fn add_definition_symbol( - &mut self, origin_module_id: RootId, origin_position: InputPosition, symbol_name: &Vec, + &mut self, origin_position: InputPosition, key: SymbolKey, target_module_id: RootId, target_definition_id: DefinitionId, ) -> Result<(), InputPosition> { - let key = SymbolKey{ - module_id: origin_module_id, - symbol_name: symbol_name.clone() - }; match self.symbol_lookup.entry(key) { Entry::Occupied(o) => Err(o.get().position), Entry::Vacant(v) => { diff --git a/src/protocol/parser/type_table.rs b/src/protocol/parser/type_table.rs index c0c177905a1b940b1a16a69b8e5651f6482523d9..49841495a3f33c2e8fffaff9246eb741fe827e60 100644 --- a/src/protocol/parser/type_table.rs +++ b/src/protocol/parser/type_table.rs @@ -857,14 +857,14 @@ impl TypeTable { // polymorphic arguments. If so then we can halt the // execution for (poly_arg_idx, poly_arg) in poly_vars.iter().enumerate() { - if *poly_arg == symbolic.identifier { + if symbolic.identifier == *poly_arg { set_resolve_result(ResolveResult::PolyArg(poly_arg_idx)); continue 'resolve_loop; } } // Lookup the definition in the symbol table - let symbol = ctx.symbols.resolve_namespaced_symbol(root_id, &symbolic.identifier); + let (symbol, mut ident_iter) = ctx.symbols.resolve_namespaced_symbol(root_id, &symbolic.identifier); if symbol.is_none() { return Err(ParseError2::new_error( &ctx.modules[root_id.index as usize].source, symbolic.identifier.position, @@ -872,21 +872,35 @@ impl TypeTable { )) } - let (symbol_value, mut ident_iter) = symbol.unwrap(); + let symbol_value = symbol.unwrap(); + let module_source = &ctx.modules[root_id.index as usize].source; + match symbol_value.symbol { Symbol::Namespace(_) => { // Reference to a namespace instead of a type + let last_ident = ident_iter.prev(); return if ident_iter.num_remaining() == 0 { + // Could also have polymorphic args, but we + // don't care, just throw this error: Err(ParseError2::new_error( - &ctx.modules[root_id.index as usize].source, symbolic.identifier.position, + module_source, symbolic.identifier.position, "Expected a type, got a module name" )) - } else { - let next_identifier = ident_iter.next().unwrap(); + } else if last_ident.is_some() && last_ident.map(|(_, poly_args)| poly_args.is_some()).unwrap() { + // Halted at a namespaced because we encountered + // polymorphic arguments Err(ParseError2::new_error( - &ctx.modules[root_id.index as usize].source, symbolic.identifier.position, - &format!("Could not find symbol '{}' with this module", String::from_utf8_lossy(next_identifier)) + module_source, symbolic.identifier.position, + "Illegal specification of polymorphic arguments to a module name" )) + } else { + // Impossible (with the current implementation + // of the symbol table) + unreachable!( + "Got namespace symbol with {} returned symbols from {}", + ident_iter.num_returned(), + &String::from_utf8_lossy(&symbolic.identifier.value) + ); } }, Symbol::Definition((root_id, definition_id)) => { @@ -896,7 +910,7 @@ impl TypeTable { // we found. Return the appropriate message return if definition.is_struct() || definition.is_enum() { Err(ParseError2::new_error( - &ctx.modules[root_id.index as usize].source, symbolic.identifier.position, + module_source, symbolic.identifier.position, &format!( "Unknown type '{}', did you mean to use '{}'?", String::from_utf8_lossy(&symbolic.identifier.value), @@ -905,8 +919,8 @@ impl TypeTable { )) } else { Err(ParseError2::new_error( - &ctx.modules[root_id.index as usize].source, symbolic.identifier.position, - "Unknown type" + module_source, symbolic.identifier.position, + "Unknown datatype" )) } } @@ -914,7 +928,7 @@ impl TypeTable { // Found a match, make sure it is a datatype if !(definition.is_struct() || definition.is_enum()) { return Err(ParseError2::new_error( - &ctx.modules[root_id.index as usize].source, symbolic.identifier.position, + module_source, symbolic.identifier.position, "Embedded types must be datatypes (structs or enums)" )) } @@ -934,8 +948,11 @@ impl TypeTable { // Note: because we're resolving parser types, not // embedded types, we're parsing a tree, so we can't // get stuck in a cyclic loop. - for poly_arg_type_id in &symbolic.poly_args { - self.parser_type_iter.push_back(*poly_arg_type_id); + let last_ident = ident_iter.prev(); + if let Some((_, Some(poly_args))) = last_ident { + for poly_arg_type_id in poly_args { + self.parser_type_iter.push_back(*poly_arg_type_id); + } } } } @@ -1001,12 +1018,12 @@ impl TypeTable { }, PTV::Symbolic(symbolic) => { for (poly_arg_idx, poly_arg) in poly_args.iter_mut().enumerate() { - if poly_arg.identifier == symbolic.identifier { + if symbolic.identifier == poly_arg.identifier { poly_arg.is_in_use = true; // TODO: If we allow higher-kinded types in the future, // then we can't continue here, but must resolve the // polyargs as well - debug_assert!(symbolic.poly_args.is_empty(), "got polymorphic arguments to a polymorphic variable"); + debug_assert!(!symbolic.identifier.has_poly_args(), "got polymorphic arguments to a polymorphic variable"); debug_assert!(symbolic.variant.is_none(), "symbolic parser type's variant already resolved"); symbolic.variant = Some(SymbolicParserTypeVariant::PolyArg(type_definition_id, poly_arg_idx)); continue 'type_loop; @@ -1014,20 +1031,31 @@ impl TypeTable { } // Must match a definition - let symbol = ctx.symbols.resolve_namespaced_symbol(root_id, &symbolic.identifier); + let (symbol, _) = ctx.symbols.resolve_namespaced_symbol(root_id, &symbolic.identifier); debug_assert!(symbol.is_some(), "could not resolve symbolic parser type when determining poly args"); let (symbol, ident_iter) = symbol.unwrap(); debug_assert_eq!(ident_iter.num_remaining(), 0, "no exact symbol match when determining poly args"); let (_root_id, definition_id) = symbol.as_definition().unwrap(); - // Must be a struct, enum, or union + // Must be a struct, enum, or union, we checked this let defined_type = self.lookup.get(&definition_id).unwrap(); if cfg!(debug_assertions) { + // Make sure type class is correct let type_class = defined_type.definition.type_class(); debug_assert!( type_class == TypeClass::Struct || type_class == TypeClass::Enum || type_class == TypeClass::Union, "embedded type's class is not struct, enum or union" ); + // Make sure polymorphic arguments occurred at the end + let num_poly = symbolic.identifier.iter() + .map(|(_, v)| v) + .filter(|v| v.is_some()) + .count(); + debug_assert!(num_poly <= 1, "more than one section with polymorphic arguments"); + if num_poly == 1 { + let (_, poly_args) = symbolic.identifier.iter().last().unwrap(); + debug_assert!(poly_args.is_some(), "got poly args, but not at end of identifier"); + } } if symbolic.poly_args.len() != defined_type.poly_args.len() { diff --git a/src/protocol/tests/parser_imports.rs b/src/protocol/tests/parser_imports.rs index 27430f1dcbddde5ff7cc25d5f9682eb4e6d4791e..94faacc68bcf7b037fc2a7f884c56664994b31bf 100644 --- a/src/protocol/tests/parser_imports.rs +++ b/src/protocol/tests/parser_imports.rs @@ -145,4 +145,9 @@ fn test_multi_symbol_import() { // ") // .compile() // .expect_ok(); -} \ No newline at end of file +} + +// TODO: Test incorrect imports: +// 1. importing a module +// 2. import something a module imports +// 3. import something that doesn't exist in a module \ No newline at end of file