diff --git a/src/protocol/ast.rs b/src/protocol/ast.rs index de59ceb8784acf851202f9c4f8aa0cfb0ebfa85c..ef3c034496a8bbbe65a43dbf1993211cfa51d449 100644 --- a/src/protocol/ast.rs +++ b/src/protocol/ast.rs @@ -6,11 +6,14 @@ use std::fmt::{Debug, Display, Formatter}; use std::ops::{Index, IndexMut}; use super::arena::{Arena, Id}; -// use super::containers::StringAllocator; - -// TODO: @cleanup, transform wrapping types into type aliases where possible use crate::protocol::inputsource::*; +/// Global limits to the AST, should be checked by lexer and parser. Some are +/// arbitrary +const MAX_LEVEL: usize = 128; +const MAX_NAMESPACES: usize = 64; + + /// Helper macro that defines a type alias for a AST element ID. In this case /// only used to alias the `Id` types. macro_rules! define_aliased_ast_id { @@ -632,6 +635,133 @@ impl PartialEq for Identifier { } } +#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] +pub enum NamespacedIdentifierPart { + // Regular identifier + Identifier{start: u16, end: u16}, + // Polyargs associated with a preceding identifier + PolyArgs{start: u16, end: u16}, +} + +impl NamespacedIdentifierPart { + fn is_identifier(&self) -> bool { + match self { + NamespacedIdentifierPart::Identifier{..} => true, + NamespacedIdentifierPart::PolyArgs{..} => false, + } + } + + fn as_identifier(&self) -> (u16, u16) { + match self { + NamespacedIdentifierPart::Identifier{start, end} => (*start, *end), + NamespacedIdentifierPart::PolyArgs{..} => { + unreachable!("Tried to obtain {:?} as Identifier", self); + } + } + } + + fn as_poly_args(&self) -> (u16, u16) { + match self { + NamespacedIdentifierPart::PolyArgs{start, end} => (*start, *end), + NamespacedIdentifierPart::Identifier{..} => { + unreachable!("Tried to obtain {:?} as PolyArgs", self) + } + } + } +} + +/// An identifier with optional namespaces and polymorphic variables +#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] +pub struct NamespacedIdentifier2 { + pub position: InputPosition, + pub value: Vec, // Full name as it resides in the input source + pub poly_args: Vec, // All poly args littered throughout the namespaced identifier + pub parts: Vec, // Indices into value/poly_args +} + +impl NamespacedIdentifier2 { + pub fn iter(&self) -> NamespacedIdentifier2Iter { + return NamespacedIdentifier2Iter{ + identifier: self, + element_idx: 0 + } + } +} + +impl PartialEq for NamespacedIdentifier2 { + fn eq(&self, other: &Self) -> bool { + return self.value == other.value + } +} + +impl PartialEq for NamespacedIdentifier2 { + fn eq(&self, other: &Identifier) -> bool { + return self.value == other.value + } +} + +#[derive(Debug)] +pub struct NamespacedIdentifier2Iter<'a> { + identifier: &'a NamespacedIdentifier2, + element_idx: usize, +} + +impl<'a> Iterator for NamespacedIdentifier2Iter<'a> { + type Item = (&'a [u8], Option<&'a [ParserTypeId]>); + fn next(&mut self) -> Option { + match self.get(self.element_idx) { + Some(result) => { + self.element_idx += 1; + Some(result) + }, + None => None + } + } +} + +impl<'a> NamespacedIdentifier2Iter<'a> { + pub fn num_returned(&self) -> usize { + return self.element_idx; + } + + pub fn num_remaining(&self) -> usize { + return self.identifier.parts.len() - self.element_idx; + } + + pub fn get(&self, idx: usize) -> Option<::Item> { + if idx >= self.identifier.parts.len() { + return None + } + + let cur_part = &self.identifier.parts[idx]; + let next_part = self.identifier.parts.get(idx); + + let (ident_start, ident_end) = cur_part.as_identifier(); + let poly_slice = match next_part { + Some(part) => match part { + NamespacedIdentifierPart::Identifier{..} => None, + NamespacedIdentifierPart::PolyArgs{start, end} => Some( + &self.identifier.poly_args[*start as usize..*end as usize] + ), + }, + None => None + }; + + Some(( + &self.identifier.value[ident_start as usize..ident_end as usize], + poly_slice + )) + } + + pub fn prev(&self) -> Option<::Item> { + if self.element_idx == 0 { + return None; + } + + self.get(self.element_idx - 1) + } +} + #[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] pub struct NamespacedIdentifier { pub position: InputPosition, @@ -771,12 +901,7 @@ pub struct ParserType { #[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] pub struct SymbolicParserType { // Phase 1: parser - pub identifier: NamespacedIdentifier, - /// The user-specified polymorphic arguments. Zero-length implies that the - /// user did not specify any of them, and they're either not needed or all - /// need to be inferred. Otherwise the number of polymorphic arguments must - /// match those of the corresponding definition - pub poly_args: Vec, + pub identifier: NamespacedIdentifier2, // Phase 2: validation/linking (for types in function/component bodies) and // type table construction (for embedded types of structs/unions) pub variant: Option @@ -859,7 +984,7 @@ pub enum PrimitiveType { #[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] pub struct PrimitiveSymbolic { // Phase 1: parser - pub(crate) identifier: NamespacedIdentifier, + pub(crate) identifier: NamespacedIdentifier, // TODO: @remove at some point, also remove NSIdent itself // Phase 2: typing pub(crate) definition: Option } @@ -997,8 +1122,7 @@ pub struct LiteralStructField { #[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] pub struct LiteralStruct { // Phase 1: parser - pub(crate) identifier: NamespacedIdentifier, - pub(crate) poly_args: Vec, + pub(crate) identifier: NamespacedIdentifier2, pub(crate) fields: Vec, // Phase 2: linker pub(crate) definition: Option @@ -1007,7 +1131,7 @@ pub struct LiteralStruct { #[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] pub struct LiteralEnum { // Phase 1: parser - pub(crate) identifier: NamespacedIdentifier, + pub(crate) identifier: NamespacedIdentifier2, pub(crate) poly_args: Vec, // Phase 2: linker pub(crate) definition: Option, @@ -1025,7 +1149,7 @@ pub enum Method { #[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] pub struct MethodSymbolic { - pub(crate) identifier: NamespacedIdentifier, + pub(crate) identifier: NamespacedIdentifier2, pub(crate) definition: Option } @@ -2461,7 +2585,7 @@ pub struct VariableExpression { pub this: VariableExpressionId, // Phase 1: parser pub position: InputPosition, - pub identifier: NamespacedIdentifier, + pub identifier: NamespacedIdentifier2, // Phase 2: linker pub declaration: Option, pub parent: ExpressionParent, diff --git a/src/protocol/lexer.rs b/src/protocol/lexer.rs index ed10f5f71043a5f1c545eb5c289c9eb5422c2786..9eef0fdef53f2aa0a6e767c29e3ac84fd9d2a4ed 100644 --- a/src/protocol/lexer.rs +++ b/src/protocol/lexer.rs @@ -194,12 +194,6 @@ impl Lexer<'_> { let next = self.source.lookahead(keyword.len()); if next.is_none() { return true; } return !is_ident_rest(next); - - if let Some(next) = self.source.lookahead(keyword.len()) { - !(next >= b'A' && next <= b'Z' || next >= b'a' && next <= b'z') - } else { - true - } } fn consume_keyword(&mut self, keyword: &[u8]) -> Result<(), ParseError2> { let len = keyword.len(); @@ -477,6 +471,71 @@ impl Lexer<'_> { Ok(()) } + fn consume_namespaced_identifier2(&mut self, h: &mut Heap) -> Result { + if self.has_reserved() { + return Err(self.error_at_pos("Encountered reserved keyword")); + } + + // Consumes a part of the namespaced identifier, returns a boolean + // indicating whether polymorphic arguments were specified. + fn consume_part( + l: &mut Lexer, h: &mut Heap, ident: &mut NamespacedIdentifier2, + backup_pos: &mut InputPosition + ) -> Result<(), ParseError2> { + // Consume identifier + let ident_start = ident.value.len(); + ident.value.extend(l.consume_ident()?); + ident.parts.push(NamespacedIdentifierPart::Identifier{ + start: ident_start as u16, + end: ident.value.len() as u16 + }); + + // Maybe consume polymorphic args. + *backup_pos = l.source.pos(); + l.consume_whitespace(false)?; + let had_poly_args = match l.consume_polymorphic_args(h, true)? { + Some(args) => { + let poly_start = ident.poly_args.len(); + ident.poly_args.extend(args); + + ident.parts.push(NamespacedIdentifierPart::PolyArgs{ + start: poly_start as u16, + end: ident.poly_args.len() as u16, + }); + + *backup_pos = l.source.pos(); + }, + None => {} + }; + + Ok(had_poly_args) + } + + let mut ident = NamespacedIdentifier2{ + position: self.source.pos(), + value: Vec::new(), + poly_args: Vec::new(), + parts: Vec::new(), + }; + + // Keep consume parts separted by "::". We don't consume the trailing + // whitespace, hence we keep a backup position at the end of the last + // valid part of the namespaced identifier (i.e. the last ident, or the + // last encountered polymorphic arguments). + let mut backup_pos = self.source.pos(); + consume_part(self, h, &mut ident, &mut backup_pos)?; + self.consume_whitespace(false)?; + while self.has_string(b"::") { + self.consume_string(b"::")?; + self.consume_whitespace(false)?; + consume_part(self, h, &mut ident, &mut backup_pos)?; + self.consume_whitespace(false)?; + } + + self.source.seek(backup_pos); + Ok(ident) + } + // Types and type annotations /// Consumes a type definition. When called the input position should be at @@ -544,7 +603,7 @@ impl Lexer<'_> { // TODO: @cleanup: not particularly neat to have this special case // where we enforce polyargs in the parser-phase self.consume_keyword(b"in")?; - let poly_args = self.consume_polymorphic_args(h, allow_inference)?; + let poly_args = self.consume_polymorphic_args(h, allow_inference)?.unwrap_or_default(); let poly_arg = reduce_port_poly_args(h, &pos, poly_args) .map_err(|infer_error| { let msg = if infer_error { @@ -557,7 +616,7 @@ impl Lexer<'_> { ParserTypeVariant::Input(poly_arg) } else if self.has_keyword(b"out") { self.consume_keyword(b"out")?; - let poly_args = self.consume_polymorphic_args(h, allow_inference)?; + let poly_args = self.consume_polymorphic_args(h, allow_inference)?.unwrap_or_default(); let poly_arg = reduce_port_poly_args(h, &pos, poly_args) .map_err(|infer_error| { let msg = if infer_error { @@ -570,9 +629,8 @@ impl Lexer<'_> { ParserTypeVariant::Output(poly_arg) } else { // Must be a symbolic type - let identifier = self.consume_namespaced_identifier()?; - let poly_args = self.consume_polymorphic_args(h, allow_inference)?; - ParserTypeVariant::Symbolic(SymbolicParserType{identifier, poly_args, variant: None}) + let identifier = self.consume_namespaced_identifier2(h)?; + ParserTypeVariant::Symbolic(SymbolicParserType{identifier, variant: None}) }; // If the type was a basic type (not supporting polymorphic type @@ -693,18 +751,11 @@ impl Lexer<'_> { /// Polymorphic arguments represent the specification of the parametric /// types of a polymorphic type: they specify the value of the polymorphic /// type's polymorphic variables. - fn consume_polymorphic_args(&mut self, h: &mut Heap, allow_inference: bool) -> Result, ParseError2> { - let backup_pos = self.source.pos(); - match self.consume_comma_separated( + fn consume_polymorphic_args(&mut self, h: &mut Heap, allow_inference: bool) -> Result>, ParseError2> { + self.consume_comma_separated( h, b'<', b'>', "Expected the end of the polymorphic argument list", |lexer, heap| lexer.consume_type2(heap, allow_inference) - )? { - Some(poly_args) => Ok(poly_args), - None => { - self.source.seek(backup_pos); - Ok(vec![]) - } - } + ) } /// Consumes polymorphic variables. These are identifiers that are used @@ -1477,9 +1528,7 @@ impl Lexer<'_> { // Consume identifier and polymorphic arguments debug_log!("consume_struct_literal_expression: {}", debug_line!(self.source)); let position = self.source.pos(); - let identifier = self.consume_namespaced_identifier()?; - self.consume_whitespace(false)?; - let poly_args = self.consume_polymorphic_args(h, true)?; + let identifier = self.consume_namespaced_identifier2(h)?; self.consume_whitespace(false)?; // Consume fields @@ -1507,7 +1556,6 @@ impl Lexer<'_> { position, value: Literal::Struct(LiteralStruct{ identifier, - poly_args, fields, definition: None, }), @@ -1557,7 +1605,7 @@ impl Lexer<'_> { self.consume_keyword(b"create")?; method = Method::Create; } else { - let identifier = self.consume_namespaced_identifier()?; + let identifier = self.consume_namespaced_identifier2(h)?; method = Method::Symbolic(MethodSymbolic{ identifier, definition: None @@ -1566,7 +1614,7 @@ impl Lexer<'_> { // Consume polymorphic arguments self.consume_whitespace(false)?; - let poly_args = self.consume_polymorphic_args(h, true)?; + let poly_args = self.consume_polymorphic_args(h, true)?.unwrap_or_default(); // Consume arguments to call self.consume_whitespace(false)?; @@ -1779,7 +1827,7 @@ impl Lexer<'_> { let expect_whitespace = self.source.next() != Some(b'<'); self.consume_whitespace(expect_whitespace)?; - let poly_args = self.consume_polymorphic_args(h, true)?; + let poly_args = self.consume_polymorphic_args(h, true)?.unwrap_or_default(); let poly_arg_id = match poly_args.len() { 0 => h.alloc_parser_type(|this| ParserType{ this, pos: position.clone(), variant: ParserTypeVariant::Inferred, diff --git a/src/protocol/parser/symbol_table.rs b/src/protocol/parser/symbol_table.rs index 0a9c6b3e2c8069226cb549db8c4f2ca3e41ee67d..b6054bef4c0b56f3ab45e32f1f942a55d16974b8 100644 --- a/src/protocol/parser/symbol_table.rs +++ b/src/protocol/parser/symbol_table.rs @@ -308,20 +308,19 @@ impl SymbolTable { self.symbol_lookup.get(&SymbolKey{ module_id: within_module_id, symbol_name: identifier.clone() }) } - /// Resolves a namespaced symbol. It will try to go as far as possible in - /// actually finding a definition or a namespace. So a namespace might be - /// resolved, after it which it finds an actual definition. It may be that - /// the namespaced identifier has more elements that should be checked - /// (i.e. an enum variant, or simply an erroneous instance of too many - /// chained identifiers). This function will return None if nothing could be - /// resolved at all. + /// Resolves a namespaced symbol. This method will go as far as possible in + /// going to the right symbol. It will halt the search when: + /// 1. Polymorphic arguments are encountered on the identifier. + /// 2. A non-namespace symbol is encountered. + /// 3. A part of the identifier couldn't be resolved to anything pub(crate) fn resolve_namespaced_symbol<'t, 'i>( - &'t self, root_module_id: RootId, identifier: &'i NamespacedIdentifier - ) -> Option<(&SymbolValue, NamespacedIdentifierIter<'i>)> { + &'t self, root_module_id: RootId, identifier: &'i NamespacedIdentifier2 + ) -> (Option<&'t Symbol>, &'i NamespacedIdentifier2Iter) { let mut iter = identifier.iter(); let mut symbol: Option<&SymbolValue> = None; let mut within_module_id = root_module_id; - while let Some(partial) = iter.next() { + + while let Some((partial, poly_args)) = iter.next() { // Lookup the symbol within the currently iterated upon module let lookup_key = SymbolKey{ module_id: within_module_id, symbol_name: Vec::from(partial) }; let new_symbol = self.symbol_lookup.get(&lookup_key); @@ -329,7 +328,7 @@ impl SymbolTable { match new_symbol { None => { // Can't find anything - break; + break; }, Some(new_symbol) => { // Found something, but if we already moved to another @@ -339,21 +338,6 @@ impl SymbolTable { match &new_symbol.symbol { Symbol::Namespace(new_root_id) => { if root_module_id != within_module_id { - // Don't jump from module to module, keep the - // old symbol (which must be a Namespace) and - // break - debug_assert!(symbol.is_some()); - debug_assert!(symbol.unwrap().is_namespace()); - debug_assert!(iter.num_returned() > 1); - - // For handling this error, we need to revert - // the iterator by one - let to_skip = iter.num_returned() - 1; - iter = identifier.iter(); - for _ in 0..to_skip { iter.next(); } - break; - } - within_module_id = *new_root_id; symbol = Some(new_symbol); }, @@ -381,8 +365,8 @@ impl SymbolTable { } match symbol { - None => None, - Some(symbol) => Some((symbol, iter)) + None => Ok(None), + Some(symbol) => Ok(Some((symbol, iter))) } }