Files @ 379cffa23df8
Branch filter:

Location: CSY/reowolf/src/protocol/parser/symbol_table.rs

379cffa23df8 20.1 KiB application/rls-services+xml Show Annotation Show as Raw Download as Raw
mh
WIP on type table
  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
// TODO: Maybe allow namespaced-aliased imports. It is currently not possible
//  to express the following:
//      import Module.Submodule as SubMod
//      import SubMod::{Symbol}
//  And it is especially not possible to express the following:
//      import SubMod::{Symbol}
//      import Module.Submodule as SubMod
use crate::protocol::ast::*;
use crate::protocol::inputsource::*;

use std::collections::{HashMap, hash_map::Entry};
use crate::protocol::parser::LexedModule;

#[derive(PartialEq, Eq, Hash)]
struct SymbolKey {
    module_id: RootId,
    symbol_name: Vec<u8>,
}

pub(crate) enum Symbol {
    Namespace(RootId),
    Definition((RootId, DefinitionId)),
}

pub(crate) struct SymbolValue {
    // Position refers to the origin of the symbol definition (i.e. the module's
    // RootId that is in the key being used to lookup this value)
    pub(crate) position: InputPosition,
    pub(crate) symbol: Symbol,
}

impl SymbolValue {
    pub(crate) fn is_namespace(&self) -> bool {
        match &self.symbol {
            Symbol::Namespace(_) => true,
            _ => false
        }
    }
    pub(crate) fn as_namespace(&self) -> Option<RootId> {
        match &self.symbol {
            Symbol::Namespace(root_id) => Some(*root_id),
            _ => None,
        }
    }

    pub(crate) fn as_definition(&self) -> Option<(RootId, DefinitionId)> {
        match &self.symbol {
            Symbol::Definition((root_id, definition_id)) => Some((*root_id, *definition_id)),
            _ => None,
        }
    }
}
/// `SymbolTable` is responsible for two parts of the parsing process: firstly
/// it ensures that there are no clashing symbol definitions within each file,
/// and secondly it will resolve all symbols within a module to their
/// appropriate definitions (in case of enums, functions, etc.) and namespaces
/// (currently only external modules can act as namespaces). If a symbol clashes
/// or if a symbol cannot be resolved this will be an error.
///
/// Within the compilation process the symbol table is responsible for resolving
/// namespaced identifiers (e.g. Module::Enum::EnumVariant) to the appropriate
/// definition (i.e. not namespaces; as the language has no way to use
/// namespaces except for using them in namespaced identifiers).
pub(crate) struct SymbolTable {
    // Lookup from module name (not any aliases) to the root id
    module_lookup: HashMap<Vec<u8>, RootId>,
    // Lookup from within a module, to a particular imported (potentially
    // aliased) or defined symbol. Basically speaking: if the source code of a
    // module contains correctly imported/defined symbols, then this lookup
    // will always return the corresponding definition
    symbol_lookup: HashMap<SymbolKey, SymbolValue>,
}

impl SymbolTable {
    pub(crate) fn new(heap: &Heap, modules: &[LexedModule]) -> Result<Self, ParseError2> {
        // Sanity check
        if cfg!(debug_assertions) {
            for (index, module) in modules.iter().enumerate() {
                debug_assert_eq!(
                    index, module.root_id.0.index as usize,
                    "module RootId does not correspond to LexedModule index"
                )
            }
        }

        // Preparation: create a lookup from module name to root id. This does
        // not take aliasing into account.
        let mut module_lookup = HashMap::with_capacity(modules.len());
        for module in modules {
            // TODO: Maybe put duplicate module name checking here?
            // TODO: @string
            module_lookup.insert(module.module_name.clone(), module.root_id);
        }

        // Preparation: determine total number of imports we will be inserting
        // into the lookup table. We could just iterate over the arena, but then
        // we don't know the source file the import belongs to.
        let mut lookup_reserve_size = 0;
        for module in modules {
            let module_root = &heap[module.root_id];
            for import_id in &module_root.imports {
                match &heap[*import_id] {
                    Import::Module(_) => lookup_reserve_size += 1,
                    Import::Symbols(import) => {
                        if import.symbols.is_empty() {
                            // Add all symbols from the other module
                            match module_lookup.get(&import.module_name) {
                                Some(target_module_id) => {
                                    lookup_reserve_size += heap[*target_module_id].definitions.len()
                                },
                                None => {
                                    return Err(
                                        ParseError2::new_error(&module.source, import.position, "Cannot resolve module")
                                    );
                                }
                            }
                        }
                    }
                }
            }
        }

        let mut table = Self{
            module_lookup,
            symbol_lookup: HashMap::with_capacity(lookup_reserve_size)
        };

        // First pass: we go through all of the modules and add lookups to
        // symbols that are defined within that module. Cross-module imports are
        // not yet resolved
        for module in modules {
            let root = &heap[module.root_id];
            for definition_id in &root.definitions {
                let definition = &heap[*definition_id];
                let identifier = definition.identifier();
                if let Err(previous_position) = table.add_definition_symbol(
                    module.root_id, identifier.position, &identifier.value,
                    module.root_id, *definition_id
                ) {
                    return Err(
                        ParseError2::new_error(&module.source, definition.position(), "Symbol is multiply defined")
                            .with_postfixed_info(&module.source, previous_position, "Previous definition was here")
                    )
                }
            }
        }

        // Second pass: now that we can find symbols in modules, we can resolve
        // all imports (if they're correct, that is)
        for module in modules {
            let root = &heap[module.root_id];
            for import_id in &root.imports {
                let import = &heap[*import_id];
                match import {
                    Import::Module(import) => {
                        // Find the module using its name
                        let target_root_id = table.resolve_module(&import.module_name);
                        if target_root_id.is_none() {
                            return Err(ParseError2::new_error(&module.source, import.position, "Could not resolve module"));
                        }
                        let target_root_id = target_root_id.unwrap();
                        if target_root_id == module.root_id {
                            return Err(ParseError2::new_error(&module.source, import.position, "Illegal import of self"));
                        }

                        // Add the target module under its alias
                        if let Err(previous_position) = table.add_namespace_symbol(
                            module.root_id, import.position,
                            &import.alias, target_root_id
                        ) {
                            return Err(
                                ParseError2::new_error(&module.source, import.position, "Symbol is multiply defined")
                                    .with_postfixed_info(&module.source, previous_position, "Previous definition was here")
                            );
                        }
                    },
                    Import::Symbols(import) => {
                        // Find the target module using its name
                        let target_root_id = table.resolve_module(&import.module_name);
                        if target_root_id.is_none() {
                            return Err(ParseError2::new_error(&module.source, import.position, "Could not resolve module of symbol imports"));
                        }
                        let target_root_id = target_root_id.unwrap();
                        if target_root_id == module.root_id {
                            return Err(ParseError2::new_error(&module.source, import.position, "Illegal import of self"));
                        }

                        // Determine which symbols to import
                        if import.symbols.is_empty() {
                            // Import of all symbols, not using any aliases
                            for definition_id in &heap[target_root_id].definitions {
                                let definition = &heap[*definition_id];
                                let identifier = definition.identifier();
                                if let Err(previous_position) = table.add_definition_symbol(
                                    module.root_id, import.position, &identifier.value,
                                    target_root_id, *definition_id
                                ) {
                                    return Err(
                                        ParseError2::new_error(
                                            &module.source, import.position,
                                            &format!("Imported symbol '{}' is already defined", String::from_utf8_lossy(&identifier.value))
                                        )
                                        .with_postfixed_info(
                                            &modules[target_root_id.0.index as usize].source,
                                            definition.position(),
                                            "The imported symbol is defined here"
                                        )
                                        .with_postfixed_info(
                                            &module.source, previous_position, "And is previously defined here"
                                        )
                                    )
                                }
                            }
                        } else {
                            // Import of specific symbols, optionally using aliases
                            for symbol in &import.symbols {
                                // Because we have already added per-module definitions, we can use
                                // the table to lookup this particular symbol. Note: within a single
                                // module a namespace-import and a symbol-import may not collide.
                                // Hence per-module symbols are unique.
                                // However: if we import a symbol from another module, we don't want
                                // to "import a module's imported symbol". And so if we do find
                                // a symbol match, we need to make sure it is a definition from
                                // within that module by checking `source_root_id == target_root_id`
                                let target_symbol = table.resolve_symbol(target_root_id, &symbol.name);
                                let symbol_definition_id = match target_symbol {
                                    Some(target_symbol) => {
                                        match target_symbol.symbol {
                                            Symbol::Definition((symbol_root_id, symbol_definition_id)) => {
                                                if symbol_root_id == target_root_id {
                                                    Some(symbol_definition_id)
                                                } else {
                                                    // This is imported within the target module, and not
                                                    // defined within the target module
                                                    None
                                                }
                                            },
                                            Symbol::Namespace(_) => {
                                                // We don't import a module's "module import"
                                                None
                                            }
                                        }
                                    },
                                    None => None
                                };

                                if symbol_definition_id.is_none() {
                                    return Err(
                                        ParseError2::new_error(&module.source, symbol.position, "Could not resolve symbol")
                                    )
                                }
                                let symbol_definition_id = symbol_definition_id.unwrap();

                                if let Err(previous_position) = table.add_definition_symbol(
                                    module.root_id, symbol.position, &symbol.alias,
                                    target_root_id, symbol_definition_id
                                ) {
                                    return Err(
                                        ParseError2::new_error(&module.source, symbol.position, "Symbol is multiply defined")
                                            .with_postfixed_info(&module.source, previous_position, "Previous definition was here")
                                    )
                                }
                            }
                        }
                    }
                }
            }
        }

        debug_assert_eq!(
            table.symbol_lookup.len(), lookup_reserve_size,
            "miscalculated reserved size for symbol lookup table"
        );
        Ok(table)
    }

    /// Resolves a module by its defined name
    pub(crate) fn resolve_module(&self, identifier: &Vec<u8>) -> Option<RootId> {
        self.module_lookup.get(identifier).map(|v| *v)
    }

    /// Resolves a symbol within a particular module, indicated by its RootId,
    /// with a single non-namespaced identifier
    pub(crate) fn resolve_symbol(&self, within_module_id: RootId, identifier: &Vec<u8>) -> Option<&SymbolValue> {
        self.symbol_lookup.get(&SymbolKey{ module_id: within_module_id, symbol_name: identifier.clone() })
    }

    /// Resolves a namespaced symbol. It will try to go as far as possible in
    /// actually finding a definition or a namespace. So a namespace might be
    /// resolved, after it which it finds an actual definition. It may be that
    /// the namespaced identifier has more elements that should be checked
    /// (i.e. an enum variant, or simply an erroneous instance of too many
    /// chained identifiers). This function will return None if nothing could be
    /// resolved at all.
    pub(crate) fn resolve_namespaced_symbol<'t, 'i>(
        &'t self, root_module_id: RootId, identifier: &'i NamespacedIdentifier
    ) -> Option<(&SymbolValue, NamespacedIdentifierIter<'i>)> {
        let mut iter = identifier.iter();
        let mut symbol: Option<&SymbolValue> = None;
        let mut within_module_id = root_module_id;
        while let Some(partial) = iter.next() {
            // Lookup the symbol within the currently iterated upon module
            let lookup_key = SymbolKey{ module_id: within_module_id, symbol_name: Vec::from(partial) };
            let new_symbol = self.symbol_lookup.get(&lookup_key);
            
            match new_symbol {
                None => {
                    // Can't find anything
                    break; 
                },
                Some(new_symbol) => {
                    // Found something, but if we already moved to another
                    // module then we don't want to keep jumping across modules,
                    // we're only interested in symbols defined within that
                    // module.
                    match &new_symbol.symbol {
                        Symbol::Namespace(new_root_id) => {
                            if root_module_id != within_module_id {
                                // Don't jump from module to module, keep the
                                // old symbol (which must be a Namespace) and
                                // break
                                debug_assert!(symbol.is_some());
                                debug_assert!(symbol.unwrap().is_namespace());
                                debug_assert!(iter.num_returned() > 1);

                                // For handling this error, we need to revert
                                // the iterator by one
                                let to_skip = iter.num_returned() - 1;
                                iter = identifier.iter();
                                for _ in 0..to_skip { iter.next(); }
                                break;
                            }

                            within_module_id = *new_root_id;
                            symbol = Some(new_symbol);
                        },
                        Symbol::Definition((definition_root_id, _)) => {
                            // Found a definition, but if we already jumped
                            // modules, then this must be defined within that
                            // module.
                            if root_module_id != within_module_id && within_module_id != *definition_root_id {
                                // This is an imported definition within the module
                                // TODO: Maybe factor out? Dunno...
                                debug_assert!(symbol.is_some());
                                debug_assert!(symbol.unwrap().is_namespace());
                                debug_assert!(iter.num_returned() > 1);
                                let to_skip = iter.num_returned() - 1;
                                iter = identifier.iter();
                                for _ in 0..to_skip { iter.next(); }
                                break;
                            }
                            symbol = Some(new_symbol);
                            break;
                        }
                    }
                }
            }
        }

        match symbol {
            None => None,
            Some(symbol) => Some((symbol, iter))
        }
    }

    /// Attempts to add a namespace symbol. Returns `Ok` if the symbol was
    /// inserted. If the symbol already exists then `Err` will be returned
    /// together with the previous definition's source position (in the origin
    /// module's source file).
    // Note: I would love to return a reference to the value, but Rust is
    // preventing me from doing so... That, or I'm not smart enough...
    fn add_namespace_symbol(
        &mut self, origin_module_id: RootId, origin_position: InputPosition, symbol_name: &Vec<u8>, target_module_id: RootId
    ) -> Result<(), InputPosition> {
        let key = SymbolKey{
            module_id: origin_module_id,
            symbol_name: symbol_name.clone()
        };
        match self.symbol_lookup.entry(key) {
            Entry::Occupied(o) => Err(o.get().position),
            Entry::Vacant(v) => {
                v.insert(SymbolValue{
                    position: origin_position,
                    symbol: Symbol::Namespace(target_module_id)
                });
                Ok(())
            }
        }
    }

    /// Attempts to add a definition symbol. Returns `Ok` if the symbol was
    /// inserted. If the symbol already exists then `Err` will be returned
    /// together with the previous definition's source position (in the origin
    /// module's source file).
    fn add_definition_symbol(
        &mut self, origin_module_id: RootId, origin_position: InputPosition, symbol_name: &Vec<u8>,
        target_module_id: RootId, target_definition_id: DefinitionId,
    ) -> Result<(), InputPosition> {
        let key = SymbolKey{
            module_id: origin_module_id,
            symbol_name: symbol_name.clone()
        };
        match self.symbol_lookup.entry(key) {
            Entry::Occupied(o) => Err(o.get().position),
            Entry::Vacant(v) => {
                v.insert(SymbolValue {
                    position: origin_position,
                    symbol: Symbol::Definition((target_module_id, target_definition_id))
                });
                Ok(())
            }
        }
    }
}