From 9227f244da7360dedcd65c7dfe8ddb03f016a9a5 2021-04-16 09:44:06 From: MH Date: 2021-04-16 09:44:06 Subject: [PATCH] WIP on compiler rearchitecting --- diff --git a/src/protocol/ast.rs b/src/protocol/ast.rs index 37c977fdd1de92cc52c1398305ff9553be0bc4a3..3a07106e8d89de2e295b473c3ebbc6f723135a9c 100644 --- a/src/protocol/ast.rs +++ b/src/protocol/ast.rs @@ -339,8 +339,7 @@ pub struct AliasedSymbol { pub position: InputPosition, pub name: Identifier, pub alias: Identifier, - // Phase 2: symbol resolving - pub definition_id: Option, + pub definition_id: DefinitionId, } #[derive(Debug, Clone)] @@ -349,12 +348,7 @@ pub struct ImportSymbols { // Phase 1: parser pub span: InputSpan, pub module_name: Vec, - // Phase 2: module resolving pub module_id: Option, - // Phase 1&2 - // if symbols is empty, then we implicitly import all symbols without any - // aliases for them. If it is not empty, then symbols are explicitly - // specified, and optionally given an alias. pub symbols: Vec, } diff --git a/src/protocol/lexer2.rs b/src/protocol/lexer2.rs index 52d6b62b3674fd8f2b1b682c25a686dd95e81b10..4ccf5b8e8601ef99d96b2b8acb77d5ed5304ef08 100644 --- a/src/protocol/lexer2.rs +++ b/src/protocol/lexer2.rs @@ -220,38 +220,38 @@ impl PassPreSymbol { // Retrieve identifier of definition let (identifier_text, identifier_span) = consume_ident(&module.source, &mut iter)?; let ident_text = ctx.pool.intern(identifier_text); - let identifier = Identifier{ span: identifier_span, value: ident_text }; + let identifier = Identifier{ span: identifier_span, value: ident_text.clone() }; // Reserve space in AST for definition and add it to the symbol table - let symbol_definition; + let definition_class; let ast_definition_id; match kw { KeywordDefinition::Struct => { let struct_def_id = ctx.heap.alloc_struct_definition(|this| { StructDefinition::new_empty(this, definition_span, identifier) }); - symbol_definition = SymbolDefinition::Struct(struct_def_id); + definition_class = DefinitionClass::Struct; ast_definition_id = struct_def_id.upcast(); }, KeywordDefinition::Enum => { let enum_def_id = ctx.heap.alloc_enum_definition(|this| { EnumDefinition::new_empty(this, definition_span, identifier) }); - symbol_definition = SymbolDefinition::Enum(enum_def_id); + definition_class = DefinitionClass::Enum; ast_definition_id = enum_def_id.upcast(); }, KeywordDefinition::Union => { let union_def_id = ctx.heap.alloc_union_definition(|this| { UnionDefinition::new_empty(this, definition_span, identifier) }); - symbol_definition = SymbolDefinition::Union(union_def_id); + definition_class = DefinitionClass::Union; ast_definition_id = union_def_id.upcast() }, KeywordDefinition::Function => { let func_def_id = ctx.heap.alloc_function_definition(|this| { FunctionDefinition::new_empty(this, definition_span, identifier) }); - symbol_definition = SymbolDefinition::Function(func_def_id); + definition_class = DefinitionClass::Function; ast_definition_id = func_def_id.upcast(); }, KeywordDefinition::Primitive | KeywordDefinition::Composite => { @@ -263,19 +263,22 @@ impl PassPreSymbol { let comp_def_id = ctx.heap.alloc_component_definition(|this| { ComponentDefinition::new_empty(this, definition_span, component_variant, identifier) }); - symbol_definition = SymbolDefinition::Component(comp_def_id); + definition_class = DefinitionClass::Component; ast_definition_id = comp_def_id.upcast(); } } let symbol = Symbol{ - defined_in_module: module.root_id, - defined_in_scope: SymbolScope::Module(module.root_id), - definition_span, - identifier_span, - introduced_at: None, - name: definition_ident, - definition: symbol_definition + name: ident_text, + data: SymbolVariant::Definition(SymbolDefinition{ + defined_in_module: module.root_id, + defined_in_scope: SymbolScope::Module(module.root_id), + definition_span, + identifier_span, + introduced_at: None, + class: definition_class, + definition_id: ast_definition_id, + }), }; self.symbols.push(symbol); self.definitions.push(ast_definition_id); @@ -357,16 +360,57 @@ impl PassImport { this, span: import_span, module_name: Identifier{ span: module_name_span, value: module_name }, - alias: Identifier{ span: alias_span, value: alias }, + alias: Identifier{ span: alias_span, value: alias.clone() }, module_id: target_root_id })); ctx.symbols.insert_symbol(SymbolScope::Module(module.root_id), Symbol{ - defined_in_module: target_root_id, - defined_in_scope: SymbolScope::Module(target_root_id), - definition_span - }) + name: alias, + data: SymbolVariant::Module(SymbolModule{ + root_id: target_root_id, + introduced_at: import_id, + }), + }); } else if Some(TokenKind::ColonColon) == next { + fn consume_symbol_and_maybe_alias<'a>( + source: &'a InputSource, iter: &mut TokenIter, in_scope: SymbolScope, ctx: &Ctx + ) -> Result<(&'a [u8], InputSpan, Option<(&'a [u8], InputSpan)>), ParseError> { + // Consume symbol and make sure it points to something valid + let (symbol, symbol_span) = consume_ident(source, iter)?; + let target = ctx.symbols.get_symbol_by_name_defined_in_scope(in_scope, symbol); + if target.is_none() { + + } + + if peek_ident(source, iter) == b"as" { + // Consume alias + iter.consume(); + let (alias, alias_span) = consume_ident(source, iter)?; + Ok((symbol, symbol_span, Some((alias, alias_span)))) + } else { + Ok((symbol, symbol_span, None)) + } + } + iter.consume(); + + let next = iter.next(); + if Some(TokenKind::Ident) = next { + // Importing a single symbol + iter.consume(); + let (symbol_text, symbol_span, maybe_alias) = consume_symbol_and_maybe_alias(&module.source, &mut iter)?; + let target_symbol = ctx.symbols.get_symbol_by_name_defined_in_scope( + SymbolScope::Module(target_root_id)) + } else if Some(TokenKind::OpenCurly) = next { + // Importing multiple symbols + iter.consume(); + } else if Some(TokenKind::Star) = next { + // Import all symbols from the module + iter.consume(); + } else { + return Err(ParseError::new_error_str_at_pos( + &module.source, iter.last_valid_pos(), "expected symbol name, '{' or '*'" + )); + } } else { // Assume implicit alias, then check if we get the semicolon next let module_name_str = module_name.as_str(); @@ -386,7 +430,7 @@ impl PassImport { fn consume_domain_ident<'a>(source: &'a InputSource, iter: &mut TokenIter) -> Result<(&'a [u8], InputSpan), ParseError> { let (_, mut span) = consume_ident(source, iter)?; while let Some(TokenKind::Dot) = iter.next() { - consume_dot(source, iter)?; + iter.consume(); let (_, new_span) = consume_ident(source, iter)?; span.end = new_span.end; } @@ -407,9 +451,14 @@ fn consume_domain_ident<'a>(source: &'a InputSource, iter: &mut TokenIter) -> Re Ok((source.section(span.begin, span.end), span)) } -fn consume_dot<'a>(source: &'a InputSource, iter: &mut TokenIter) -> Result<(), ParseError> { - if Some(TokenKind::Dot) != iter.next() { - return Err(ParseError::new_error_str_at_pos(source, iter.last_valid_pos(), "expected a dot")); +/// Consumes a specific expected token. Be careful to only call this with tokens that do not have a +/// variable length. +fn consume_token(source: &InputSource, iter: &mut TokenIter, expected: TokenKind) -> Result<(), ParseError> { + if Some(expected) != iter.next() { + return Err(ParseError::new_error_at_pos( + source, iter.last_valid_pos(), + format!("expected '{}'", expected.token_chars()) + )); } iter.consume(); Ok(()) diff --git a/src/protocol/parser/symbol_table2.rs b/src/protocol/parser/symbol_table2.rs index f2f6839a73bd6eedb2c0390b57140d2006a52598..bc40c7b5b9f2666321f7903f2e98c7246d7bbd21 100644 --- a/src/protocol/parser/symbol_table2.rs +++ b/src/protocol/parser/symbol_table2.rs @@ -32,6 +32,27 @@ pub enum SymbolClass { Component } +#[derive(Clone, Copy, PartialEq, Eq)] +pub enum DefinitionClass { + Struct, + Enum, + Union, + Function, + Component, +} + +impl DefinitionClass { + fn as_symbol_class(&self) -> SymbolClass { + match self { + DefinitionClass::Struct => SymbolClass::Struct, + DefinitionClass::Enum => SymbolClass::Enum, + DefinitionClass::Union => SymbolClass::Union, + DefinitionClass::Function => SymbolClass::Function, + DefinitionClass::Component => SymbolClass::Component, + } + } +} + struct ScopedSymbols { scope: SymbolScope, parent_scope: Option, @@ -39,13 +60,16 @@ struct ScopedSymbols { symbols: Vec, } -pub enum SymbolDefinition { - Module(RootId), - Struct(StructDefinitionId), - Enum(EnumDefinitionId), - Union(UnionDefinitionId), - Function(FunctionDefinitionId), - Component(ComponentDefinitionId), +impl ScopedSymbols { + fn get_symbol<'a>(&'a self, name: &StringRef) -> Option<&'a Symbol> { + for symbol in self.symbols.iter() { + if symbol.name == *name { + return Some(symbol); + } + } + + None + } } impl SymbolDefinition { @@ -64,22 +88,43 @@ impl SymbolDefinition { } } -pub enum SymbolData { - +pub struct SymbolModule { + pub root_id: RootId, + pub introduced_at: ImportId, } -pub struct Symbol { - // Definition location (may be different from the scope/module in which it - // is used if the symbol is imported) +pub struct SymbolDefinition { + // Definition location (not necessarily the place where the symbol + // is introduced, as it may be imported) pub defined_in_module: RootId, pub defined_in_scope: SymbolScope, - pub definition_span: InputSpan, // full span of definition, not just the name + pub definition_span: InputSpan, // full span of definition pub identifier_span: InputSpan, // span of just the identifier - // Introduction location (if imported instead of defined) - pub introduced_at: Option, - // Symbol properties + // Location where the symbol is introduced in its scope + pub imported_at: Option, + // Definition in the heap, with a utility enum to determine its + // class if the ID is not needed. + pub class: DefinitionClass, + pub definition_id: DefinitionId, +} + +pub enum SymbolVariant { + Module(SymbolModule), + Definition(SymbolDefinition), +} + +pub struct Symbol { pub name: StringRef<'static>, - pub definition: SymbolDefinition, + pub data: SymbolVariant, +} + +impl Symbol { + fn class(&self) -> SymbolClass { + match &self.data { + SymbolVariant::Module(_) => SymbolClass::Module, + SymbolVariant::Definition(data) => data.class.as_symbol_class(), + } + } } pub struct SymbolTable { @@ -158,11 +203,53 @@ impl SymbolTable { Ok(()) } - /// Retrieves a particular scope. As this will be called by the compiler to - /// retrieve scopes that MUST exist, this function will panic if the - /// indicated scope does not exist. - pub(crate) fn get_scope_by_id(&mut self, scope: &SymbolScope) -> &mut ScopedSymbols { - debug_assert!(self.scope_lookup.contains_key(scope), "retrieving scope {:?}, but it doesn't exist", scope); - self.scope_lookup.get_mut(scope).unwrap() + /// Retrieves a symbol by name by searching in a particular scope and that scope's parents. The + /// returned symbol may both be imported as defined within any of the searched scopes. + pub(crate) fn get_symbol_by_name( + &self, mut in_scope: SymbolScope, name: &[u8] + ) -> Option<&Symbol> { + let string_ref = StringRef::new(name); + loop { + let scope = self.scope_lookup.get(&in_scope); + if scope.is_none() { + return None; + } + let scope = scope.unwrap(); + + if let Some(symbol) = scope.get_symbol(&string_ref) { + return Some(symbol); + } else { + // Could not find symbol in current scope, seek in the parent scope if it exists + match &scope.parent_scope { + Some(parent_scope) => { in_scope = *parent_scope; }, + None => return None, + } + } + } + } + + /// Retrieves a symbol by name by searching in a particular scope and that scope's parents. The + /// returned symbol must be defined within any of the searched scopes and may not be imported. + /// In case such an imported symbol exists then this function still returns `None`. + pub(crate) fn get_symbol_by_name_defined_in_scope( + &self, in_scope: SymbolScope, name: &[u8] + ) -> Option<&Symbol> { + match self.get_symbol_by_name(in_scope, name) { + Some(symbol) => { + match &symbol.data { + SymbolVariant::Module(_) => { + None // in-scope modules are always imported + }, + SymbolVariant::Definition(variant) => { + if variant.imported_at.is_some() { + None + } else { + Some(symbol) + } + } + } + }, + None => None, + } } } \ No newline at end of file diff --git a/src/protocol/tokenizer/mod.rs b/src/protocol/tokenizer/mod.rs index 096cd3e853ea3e20b6c6116c2e1d23e6bfb6201e..371138f42ebda9441a93b236428802eba6b6938f 100644 --- a/src/protocol/tokenizer/mod.rs +++ b/src/protocol/tokenizer/mod.rs @@ -82,10 +82,14 @@ pub(crate) enum TokenKind { } impl TokenKind { + /// Returns true if the next expected token is the special `TokenKind::SpanEnd` token. This is + /// the case for tokens of variable length (e.g. an identifier). fn has_span_end(&self) -> bool { return *self <= TokenKind::BlockComment } + /// Returns the number of characters associated with the token. May only be called on tokens + /// that do not have a variable length. fn num_characters(&self) -> u32 { debug_assert!(!self.has_span_end() && *self != TokenKind::SpanEnd); if *self <= TokenKind::Equal { @@ -96,6 +100,67 @@ impl TokenKind { 3 } } + + /// Returns the characters that are represented by the token, may only be called on tokens that + /// do not have a variable length. + pub fn token_chars(&self) -> &'static str { + debug_assert!(!self.has_span_end() && *self != TokenKind::SpanEnd); + use TokenKind as TK; + match self { + TK::Exclamation => "!", + TK::Question => "?", + TK::Pound => "#", + TK::OpenAngle => "<", + TK::OpenCurly => "{", + TK::OpenParen => "(", + TK::OpenSquare => "[", + TK::CloseAngle => ">", + TK::CloseCurly => "}", + TK::CloseParen => ")", + TK::CloseSquare => "]", + TK::Colon => ":", + TK::Comma => ",", + TK::Dot => ".", + TK::SemiColon => ";", + TK::Quote => "'", + TK::DoubleQuote => "\"", + TK::At => "@", + TK::Plus => "+", + TK::Minus => "-", + TK::Star => "*", + TK::Slash => "/", + TK::Percent => "%", + TK::Caret => "^", + TK::And => "&", + TK::Or => "|", + TK::Tilde => "~", + TK::Equal => "=", + TK::ColonColon => "::", + TK::DotDot => "..", + TK::ArrowRight => "->", + TK::PlusPlus => "++", + TK::PlusEquals => "+=", + TK::MinusMinus => "--", + TK::MinusEquals => "-=", + TK::StarEquals => "*=", + TK::SlashEquals => "/=", + TK::PercentEquals => "%=", + TK::CaretEquals => "^=", + TK::AndAnd => "&&", + TK::AndEquals => "&=", + TK::OrOr => "||", + TK::OrEquals => "|=", + TK::EqualEqual => "==", + TK::NotEqual => "!=", + TK::ShiftLeft => "<<", + TK::ShiftRight => ">>", + TK::ShiftLeftEquals => "<<=", + TK::ShiftRightEquals => ">>=", + // Lets keep these in explicitly for now, in case we want to add more symbols + TK::Ident | TK::Pragma | TK::Integer | TK::String | TK::Character | + TK::LineComment | TK::BlockComment | TK::SpanEnd => unreachable!(), + } + } } pub(crate) struct Token {