Changeset - 2d6659928698
[Not reviewed]
0 5 0
MH - 4 years ago 2021-04-01 10:25:35
henger@cwi.nl
more refactoring of namespaced identifier
5 files changed with 131 insertions and 60 deletions:
0 comments (0 inline, 0 general)
src/protocol/ast.rs
Show inline comments
 
@@ -587,7 +587,7 @@ pub struct ImportModule {
 
    // Phase 1: parser
 
    pub position: InputPosition,
 
    pub module_name: Vec<u8>,
 
    pub alias: Vec<u8>,
 
    pub alias: Identifier,
 
    // Phase 2: module resolving
 
    pub module_id: Option<RootId>,
 
}
 
@@ -596,8 +596,8 @@ pub struct ImportModule {
 
pub struct AliasedSymbol {
 
    // Phase 1: parser
 
    pub position: InputPosition,
 
    pub name: Vec<u8>,
 
    pub alias: Vec<u8>,
 
    pub name: Identifier,
 
    pub alias: Identifier,
 
    // Phase 2: symbol resolving
 
    pub definition_id: Option<DefinitionId>,
 
}
 
@@ -644,14 +644,14 @@ pub enum NamespacedIdentifierPart {
 
}
 

	
 
impl NamespacedIdentifierPart {
 
    fn is_identifier(&self) -> bool {
 
    pub(crate) fn is_identifier(&self) -> bool {
 
        match self {
 
            NamespacedIdentifierPart::Identifier{..} => true,
 
            NamespacedIdentifierPart::PolyArgs{..} => false,
 
        }
 
    }
 

	
 
    fn as_identifier(&self) -> (u16, u16) {
 
    pub(crate) fn as_identifier(&self) -> (u16, u16) {
 
        match self {
 
            NamespacedIdentifierPart::Identifier{start, end} => (*start, *end),
 
            NamespacedIdentifierPart::PolyArgs{..} => {
 
@@ -660,7 +660,7 @@ impl NamespacedIdentifierPart {
 
        }
 
    }
 

	
 
    fn as_poly_args(&self) -> (u16, u16) {
 
    pub(crate) fn as_poly_args(&self) -> (u16, u16) {
 
        match self {
 
            NamespacedIdentifierPart::PolyArgs{start, end} => (*start, *end),
 
            NamespacedIdentifierPart::Identifier{..} => {
 
@@ -670,7 +670,11 @@ impl NamespacedIdentifierPart {
 
    }
 
}
 

	
 
/// An identifier with optional namespaces and polymorphic variables
 
/// An identifier with optional namespaces and polymorphic variables. Note that 
 
/// we allow each identifier to be followed by polymorphic arguments during the 
 
/// parsing phase (e.g. Foo<A,B>::Bar<C,D>::Qux). But in our current language 
 
/// implementation we can only have valid namespaced identifier that contain one
 
/// set of polymorphic arguments at the appropriate position.
 
#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
 
pub struct NamespacedIdentifier2 {
 
    pub position: InputPosition,
 
@@ -686,6 +690,10 @@ impl NamespacedIdentifier2 {
 
            element_idx: 0
 
        }
 
    }
 

	
 
    pub fn has_poly_args(&self) -> bool {
 
        return !self.poly_args.is_empty();
 
    }
 
}
 

	
 
impl PartialEq for NamespacedIdentifier2 {
src/protocol/lexer.rs
Show inline comments
 
@@ -2394,6 +2394,7 @@ impl Lexer<'_> {
 
        self.consume_keyword(b"import")?;
 
        self.consume_whitespace(true)?;
 
        let mut value = Vec::new();
 
        let mut last_ident_pos = self.source.pos();
 
        let mut ident = self.consume_ident()?;
 
        value.append(&mut ident);
 
        let mut last_ident_start = 0;
 
@@ -2401,6 +2402,7 @@ impl Lexer<'_> {
 
        while self.has_string(b".") {
 
            self.consume_string(b".")?;
 
            value.push(b'.');
 
            last_ident_pos = self.source.pos();
 
            ident = self.consume_ident()?;
 
            last_ident_start = value.len();
 
            value.append(&mut ident);
 
@@ -2413,7 +2415,7 @@ impl Lexer<'_> {
 
        let import = if self.has_string(b"as") {
 
            self.consume_string(b"as")?;
 
            self.consume_whitespace(true)?;
 
            let alias = self.consume_ident()?;
 
            let alias = self.consume_identifier()?;
 

	
 
            h.alloc_import(|this| Import::Module(ImportModule{
 
                this,
 
@@ -2433,7 +2435,7 @@ impl Lexer<'_> {
 
                    |lexer, _heap| {
 
                        // Symbol name
 
                        let position = lexer.source.pos();
 
                        let name = lexer.consume_ident()?;
 
                        let name = lexer.consume_identifier()?;
 
                        lexer.consume_whitespace(false)?;
 

	
 
                        // Symbol alias
 
@@ -2441,7 +2443,7 @@ impl Lexer<'_> {
 
                            // With alias
 
                            lexer.consume_string(b"as")?;
 
                            lexer.consume_whitespace(true)?;
 
                            let alias = lexer.consume_ident()?;
 
                            let alias = lexer.consume_identifier()?;
 

	
 
                            Ok(AliasedSymbol{
 
                                position,
 
@@ -2482,12 +2484,12 @@ impl Lexer<'_> {
 
                }))
 
            } else if self.has_identifier() {
 
                let position = self.source.pos();
 
                let name = self.consume_ident()?;
 
                let name = self.consume_identifier()?;
 
                self.consume_whitespace(false)?;
 
                let alias = if self.has_string(b"as") {
 
                    self.consume_string(b"as")?;
 
                    self.consume_whitespace(true)?;
 
                    self.consume_ident()?
 
                    self.consume_identifier()?
 
                } else {
 
                    name.clone()
 
                };
 
@@ -2509,12 +2511,15 @@ impl Lexer<'_> {
 
            }
 
        } else {
 
            // No explicit alias or subimports, so implicit alias
 
            let alias = Vec::from(&value[last_ident_start..]);
 
            let alias_value = Vec::from(&value[last_ident_start..]);
 
            h.alloc_import(|this| Import::Module(ImportModule{
 
                this,
 
                position,
 
                module_name: value,
 
                alias,
 
                alias: Identifier{
 
                    position: last_ident_pos,
 
                    value: Vec::from(alias_value),
 
                },
 
                module_id: None,
 
            }))
 
        };
src/protocol/parser/symbol_table.rs
Show inline comments
 
@@ -17,6 +17,29 @@ struct SymbolKey {
 
    symbol_name: Vec<u8>,
 
}
 

	
 
impl SymbolKey {
 
    fn from_identifier(module_id: RootId, symbol: &Identifier) -> Self {
 
        Self{ module_id, symbol_name: symbol.value.clone() }
 
    }
 

	
 
    fn from_namespaced_identifier(module_id: RootId, symbol: &NamespacedIdentifier2) -> Self {
 
        // Strip polymorpic arguments from the identifier
 
        let symbol_name = Vec::with_capacity(symbol.value.len());
 
        debug_assert!(symbol.parts.len() > 0 && symbol.parts[0].is_identifier());
 

	
 
        let mut iter = symbol.iter();
 
        let (first_ident, _) = iter.next().unwrap();
 
        symbol_name.extend(first_ident);
 

	
 
        for (ident, _) in iter {
 
            symbol_name.push(b':');
 
            symbol_name.extend(ident);
 
        }
 

	
 
        Self{ module_id, symbol_name }
 
    }
 
}
 

	
 
pub(crate) enum Symbol {
 
    Namespace(RootId),
 
    Definition((RootId, DefinitionId)),
 
@@ -144,7 +167,7 @@ impl SymbolTable {
 
                let definition = &heap[*definition_id];
 
                let identifier = definition.identifier();
 
                if let Err(previous_position) = self.add_definition_symbol(
 
                    module.root_id, identifier.position, &identifier.value,
 
                    identifier.position, SymbolKey::from_identifier(module.root_id, &identifier),
 
                    module.root_id, *definition_id
 
                ) {
 
                    return Err(
 
@@ -175,8 +198,8 @@ impl SymbolTable {
 

	
 
                        // Add the target module under its alias
 
                        if let Err(previous_position) = self.add_namespace_symbol(
 
                            module.root_id, import.position,
 
                            &import.alias, target_root_id
 
                            import.position, SymbolKey::from_identifier(module.root_id, &import.alias),
 
                            target_root_id
 
                        ) {
 
                            return Err(
 
                                ParseError2::new_error(&module.source, import.position, "Symbol is multiply defined")
 
@@ -202,7 +225,7 @@ impl SymbolTable {
 
                                let definition = &heap[*definition_id];
 
                                let identifier = definition.identifier();
 
                                if let Err(previous_position) = self.add_definition_symbol(
 
                                    module.root_id, import.position, &identifier.value,
 
                                    import.position, SymbolKey::from_identifier(module.root_id, identifier),
 
                                    target_root_id, *definition_id
 
                                ) {
 
                                    return Err(
 
@@ -232,7 +255,8 @@ impl SymbolTable {
 
                                // to "import a module's imported symbol". And so if we do find
 
                                // a symbol match, we need to make sure it is a definition from
 
                                // within that module by checking `source_root_id == target_root_id`
 
                                let target_symbol = self.resolve_symbol(target_root_id, &symbol.name);
 
                                let key = SymbolKey::from_identifier(target_root_id, &symbol.name);
 
                                let target_symbol = self.symbol_lookup.get(&key);
 
                                let symbol_definition_id = match target_symbol {
 
                                    Some(target_symbol) => {
 
                                        match target_symbol.symbol {
 
@@ -262,7 +286,7 @@ impl SymbolTable {
 
                                let symbol_definition_id = symbol_definition_id.unwrap();
 

	
 
                                if let Err(previous_position) = self.add_definition_symbol(
 
                                    module.root_id, symbol.position, &symbol.alias,
 
                                    symbol.position, SymbolKey::from_identifier(module.root_id, &symbol.alias),
 
                                    target_root_id, symbol_definition_id
 
                                ) {
 
                                    return Err(
 
@@ -302,20 +326,16 @@ impl SymbolTable {
 
        self.module_lookup.get(identifier).map(|v| *v)
 
    }
 

	
 
    /// Resolves a symbol within a particular module, indicated by its RootId,
 
    /// with a single non-namespaced identifier
 
    pub(crate) fn resolve_symbol(&self, within_module_id: RootId, identifier: &Vec<u8>) -> Option<&SymbolValue> {
 
        self.symbol_lookup.get(&SymbolKey{ module_id: within_module_id, symbol_name: identifier.clone() })
 
    }
 

	
 
    /// Resolves a namespaced symbol. This method will go as far as possible in
 
    /// going to the right symbol. It will halt the search when:
 
    /// 1. Polymorphic arguments are encountered on the identifier.
 
    /// 2. A non-namespace symbol is encountered.
 
    /// 3. A part of the identifier couldn't be resolved to anything
 
    /// The returned iterator will always point to the next symbol (even if 
 
    /// nothing was found)
 
    pub(crate) fn resolve_namespaced_symbol<'t, 'i>(
 
        &'t self, root_module_id: RootId, identifier: &'i NamespacedIdentifier2
 
    ) -> (Option<&'t Symbol>, &'i NamespacedIdentifier2Iter) {
 
    ) -> (Option<&'t SymbolValue>, NamespacedIdentifier2Iter<'i>) {
 
        let mut iter = identifier.iter();
 
        let mut symbol: Option<&SymbolValue> = None;
 
        let mut within_module_id = root_module_id;
 
@@ -328,6 +348,7 @@ impl SymbolTable {
 
            match new_symbol {
 
                None => {
 
                    // Can't find anything
 
                    symbol = None;
 
                    break;
 
                },
 
                Some(new_symbol) => {
 
@@ -338,6 +359,14 @@ impl SymbolTable {
 
                    match &new_symbol.symbol {
 
                        Symbol::Namespace(new_root_id) => {
 
                            if root_module_id != within_module_id {
 
                                // This new symbol is imported by a foreign
 
                                // module, so this is an error
 
                                debug_assert!(symbol.is_some());
 
                                debug_assert!(symbol.unwrap().is_namespace());
 
                                debug_assert!(iter.num_returned() > 1);
 
                                symbol = None;
 
                                break;
 
                            }
 
                            within_module_id = *new_root_id;
 
                            symbol = Some(new_symbol);
 
                        },
 
@@ -347,13 +376,11 @@ impl SymbolTable {
 
                            // module.
 
                            if root_module_id != within_module_id && within_module_id != *definition_root_id {
 
                                // This is an imported definition within the module
 
                                // TODO: Maybe factor out? Dunno...
 
                                // So keep the old 
 
                                debug_assert!(symbol.is_some());
 
                                debug_assert!(symbol.unwrap().is_namespace());
 
                                debug_assert!(iter.num_returned() > 1);
 
                                let to_skip = iter.num_returned() - 1;
 
                                iter = identifier.iter();
 
                                for _ in 0..to_skip { iter.next(); }
 
                                symbol = None;
 
                                break;
 
                            }
 
                            symbol = Some(new_symbol);
 
@@ -362,11 +389,17 @@ impl SymbolTable {
 
                    }
 
                }
 
            }
 

	
 
            if poly_args.is_some() {
 
                // Polymorphic argument specification should also be a fully 
 
                // resolved result.
 
                break;
 
            }
 
        }
 

	
 
        match symbol {
 
            None => Ok(None),
 
            Some(symbol) => Ok(Some((symbol, iter)))
 
            None => (None, iter),
 
            Some(symbol) => (Some(symbol), iter)
 
        }
 
    }
 

	
 
@@ -377,12 +410,8 @@ impl SymbolTable {
 
    // Note: I would love to return a reference to the value, but Rust is
 
    // preventing me from doing so... That, or I'm not smart enough...
 
    fn add_namespace_symbol(
 
        &mut self, origin_module_id: RootId, origin_position: InputPosition, symbol_name: &Vec<u8>, target_module_id: RootId
 
        &mut self, origin_position: InputPosition, key: SymbolKey, target_module_id: RootId
 
    ) -> Result<(), InputPosition> {
 
        let key = SymbolKey{
 
            module_id: origin_module_id,
 
            symbol_name: symbol_name.clone()
 
        };
 
        match self.symbol_lookup.entry(key) {
 
            Entry::Occupied(o) => Err(o.get().position),
 
            Entry::Vacant(v) => {
 
@@ -400,13 +429,9 @@ impl SymbolTable {
 
    /// together with the previous definition's source position (in the origin
 
    /// module's source file).
 
    fn add_definition_symbol(
 
        &mut self, origin_module_id: RootId, origin_position: InputPosition, symbol_name: &Vec<u8>,
 
        &mut self, origin_position: InputPosition, key: SymbolKey,
 
        target_module_id: RootId, target_definition_id: DefinitionId,
 
    ) -> Result<(), InputPosition> {
 
        let key = SymbolKey{
 
            module_id: origin_module_id,
 
            symbol_name: symbol_name.clone()
 
        };
 
        match self.symbol_lookup.entry(key) {
 
            Entry::Occupied(o) => Err(o.get().position),
 
            Entry::Vacant(v) => {
src/protocol/parser/type_table.rs
Show inline comments
 
@@ -857,14 +857,14 @@ impl TypeTable {
 
                    // polymorphic arguments. If so then we can halt the
 
                    // execution
 
                    for (poly_arg_idx, poly_arg) in poly_vars.iter().enumerate() {
 
                        if *poly_arg == symbolic.identifier {
 
                        if symbolic.identifier == *poly_arg {
 
                            set_resolve_result(ResolveResult::PolyArg(poly_arg_idx));
 
                            continue 'resolve_loop;
 
                        }
 
                    }
 

	
 
                    // Lookup the definition in the symbol table
 
                    let symbol = ctx.symbols.resolve_namespaced_symbol(root_id, &symbolic.identifier);
 
                    let (symbol, mut ident_iter) = ctx.symbols.resolve_namespaced_symbol(root_id, &symbolic.identifier);
 
                    if symbol.is_none() {
 
                        return Err(ParseError2::new_error(
 
                            &ctx.modules[root_id.index as usize].source, symbolic.identifier.position,
 
@@ -872,21 +872,35 @@ impl TypeTable {
 
                        ))
 
                    }
 

	
 
                    let (symbol_value, mut ident_iter) = symbol.unwrap();
 
                    let symbol_value = symbol.unwrap();
 
                    let module_source = &ctx.modules[root_id.index as usize].source;
 

	
 
                    match symbol_value.symbol {
 
                        Symbol::Namespace(_) => {
 
                            // Reference to a namespace instead of a type
 
                            let last_ident = ident_iter.prev();
 
                            return if ident_iter.num_remaining() == 0 {
 
                                // Could also have polymorphic args, but we 
 
                                // don't care, just throw this error: 
 
                                Err(ParseError2::new_error(
 
                                    &ctx.modules[root_id.index as usize].source, symbolic.identifier.position,
 
                                    module_source, symbolic.identifier.position,
 
                                    "Expected a type, got a module name"
 
                                ))
 
                            } else {
 
                                let next_identifier = ident_iter.next().unwrap();
 
                            } else if last_ident.is_some() && last_ident.map(|(_, poly_args)| poly_args.is_some()).unwrap() {
 
                                // Halted at a namespaced because we encountered
 
                                // polymorphic arguments
 
                                Err(ParseError2::new_error(
 
                                    &ctx.modules[root_id.index as usize].source, symbolic.identifier.position,
 
                                    &format!("Could not find symbol '{}' with this module", String::from_utf8_lossy(next_identifier))
 
                                    module_source, symbolic.identifier.position,
 
                                    "Illegal specification of polymorphic arguments to a module name"
 
                                ))
 
                            } else {
 
                                // Impossible (with the current implementation 
 
                                // of the symbol table)
 
                                unreachable!(
 
                                    "Got namespace symbol with {} returned symbols from {}",
 
                                    ident_iter.num_returned(),
 
                                    &String::from_utf8_lossy(&symbolic.identifier.value)
 
                                );
 
                            }
 
                        },
 
                        Symbol::Definition((root_id, definition_id)) => {
 
@@ -896,7 +910,7 @@ impl TypeTable {
 
                                // we found. Return the appropriate message
 
                                return if definition.is_struct() || definition.is_enum() {
 
                                    Err(ParseError2::new_error(
 
                                        &ctx.modules[root_id.index as usize].source, symbolic.identifier.position,
 
                                        module_source, symbolic.identifier.position,
 
                                        &format!(
 
                                            "Unknown type '{}', did you mean to use '{}'?",
 
                                            String::from_utf8_lossy(&symbolic.identifier.value),
 
@@ -905,8 +919,8 @@ impl TypeTable {
 
                                    ))
 
                                } else {
 
                                    Err(ParseError2::new_error(
 
                                        &ctx.modules[root_id.index as usize].source, symbolic.identifier.position,
 
                                        "Unknown type"
 
                                        module_source, symbolic.identifier.position,
 
                                        "Unknown datatype"
 
                                    ))
 
                                }
 
                            }
 
@@ -914,7 +928,7 @@ impl TypeTable {
 
                            // Found a match, make sure it is a datatype
 
                            if !(definition.is_struct() || definition.is_enum()) {
 
                                return Err(ParseError2::new_error(
 
                                    &ctx.modules[root_id.index as usize].source, symbolic.identifier.position,
 
                                    module_source, symbolic.identifier.position,
 
                                    "Embedded types must be datatypes (structs or enums)"
 
                                ))
 
                            }
 
@@ -934,7 +948,9 @@ impl TypeTable {
 
                            // Note: because we're resolving parser types, not
 
                            // embedded types, we're parsing a tree, so we can't
 
                            // get stuck in a cyclic loop.
 
                            for poly_arg_type_id in &symbolic.poly_args {
 
                            let last_ident = ident_iter.prev();
 
                            if let Some((_, Some(poly_args))) = last_ident {
 
                                for poly_arg_type_id in poly_args {
 
                                    self.parser_type_iter.push_back(*poly_arg_type_id);
 
                                }
 
                            }
 
@@ -942,6 +958,7 @@ impl TypeTable {
 
                    }
 
                }
 
            }
 
        }
 

	
 
        // If here then all types in the embedded type's tree were resolved.
 
        debug_assert!(resolve_result.is_some(), "faulty logic in ParserType resolver");
 
@@ -1001,12 +1018,12 @@ impl TypeTable {
 
                },
 
                PTV::Symbolic(symbolic) => {
 
                    for (poly_arg_idx, poly_arg) in poly_args.iter_mut().enumerate() {
 
                        if poly_arg.identifier == symbolic.identifier {
 
                        if symbolic.identifier == poly_arg.identifier {
 
                            poly_arg.is_in_use = true;
 
                            // TODO: If we allow higher-kinded types in the future,
 
                            //  then we can't continue here, but must resolve the
 
                            //  polyargs as well
 
                            debug_assert!(symbolic.poly_args.is_empty(), "got polymorphic arguments to a polymorphic variable");
 
                            debug_assert!(!symbolic.identifier.has_poly_args(), "got polymorphic arguments to a polymorphic variable");
 
                            debug_assert!(symbolic.variant.is_none(), "symbolic parser type's variant already resolved");
 
                            symbolic.variant = Some(SymbolicParserTypeVariant::PolyArg(type_definition_id, poly_arg_idx));
 
                            continue 'type_loop;
 
@@ -1014,20 +1031,31 @@ impl TypeTable {
 
                    }
 

	
 
                    // Must match a definition
 
                    let symbol = ctx.symbols.resolve_namespaced_symbol(root_id, &symbolic.identifier);
 
                    let (symbol, _) = ctx.symbols.resolve_namespaced_symbol(root_id, &symbolic.identifier);
 
                    debug_assert!(symbol.is_some(), "could not resolve symbolic parser type when determining poly args");
 
                    let (symbol, ident_iter) = symbol.unwrap();
 
                    debug_assert_eq!(ident_iter.num_remaining(), 0, "no exact symbol match when determining poly args");
 
                    let (_root_id, definition_id) = symbol.as_definition().unwrap();
 
    
 
                    // Must be a struct, enum, or union
 
                    // Must be a struct, enum, or union, we checked this
 
                    let defined_type = self.lookup.get(&definition_id).unwrap();
 
                    if cfg!(debug_assertions) {
 
                        // Make sure type class is correct
 
                        let type_class = defined_type.definition.type_class();
 
                        debug_assert!(
 
                            type_class == TypeClass::Struct || type_class == TypeClass::Enum || type_class == TypeClass::Union,
 
                            "embedded type's class is not struct, enum or union"
 
                        );
 
                        // Make sure polymorphic arguments occurred at the end
 
                        let num_poly = symbolic.identifier.iter()
 
                            .map(|(_, v)| v)
 
                            .filter(|v| v.is_some())
 
                            .count();
 
                        debug_assert!(num_poly <= 1, "more than one section with polymorphic arguments");
 
                        if num_poly == 1 {
 
                            let (_, poly_args) = symbolic.identifier.iter().last().unwrap();
 
                            debug_assert!(poly_args.is_some(), "got poly args, but not at end of identifier");
 
                        }
 
                    }
 
    
 
                    if symbolic.poly_args.len() != defined_type.poly_args.len() {
src/protocol/tests/parser_imports.rs
Show inline comments
 
@@ -146,3 +146,8 @@ fn test_multi_symbol_import() {
 
    //     .compile()
 
    //     .expect_ok();
 
}
 

	
 
// TODO: Test incorrect imports:
 
//  1. importing a module
 
//  2. import something a module imports
 
//  3. import something that doesn't exist in a module
 
\ No newline at end of file
0 comments (0 inline, 0 general)