diff --git a/src/collections/scoped_buffer.rs b/src/collections/scoped_buffer.rs index d3eea0f78a8c985c0cab8b599d4d5f9eaa715f2e..f6f62b77ed4bca0048e38c9acbb44f1261537656 100644 --- a/src/collections/scoped_buffer.rs +++ b/src/collections/scoped_buffer.rs @@ -5,8 +5,9 @@ /// procedure, we push stuff into the buffer. At the end we take out what we /// have put in. /// -/// It is unsafe because we're using pointers to take care of borrowing rules. -/// The correctness of use is checked in debug mode. +/// It is unsafe because we're using pointers to circumvent borrowing rules in +/// the name of code cleanliness. The correctness of use is checked in debug +/// mode. /// The buffer itself. This struct should be the shared buffer. The type `T` is /// intentionally `Copy` such that it can be copied out and the underlying @@ -15,6 +16,9 @@ pub(crate) struct ScopedBuffer { pub inner: Vec, } +/// A section of the buffer. Keeps track of where we started the section. When +/// done with the section one must call `into_vec` or `forget` to remove the +/// section from the underlying buffer. pub(crate) struct ScopedSection { inner: *mut Vec, start_size: u32, @@ -48,11 +52,17 @@ impl ScopedSection { #[inline] pub(crate) fn push(&mut self, value: T) { let vec = unsafe{&mut *self.inner}; - debug_assert!_eq(vec.len(), self.cur_size as usize, "trying to push onto section, but size is larger than expected"); + debug_assert_eq!(vec.len(), self.cur_size as usize, "trying to push onto section, but size is larger than expected"); vec.push(value); if cfg!(debug_assertions) { self.cur_size += 1; } } + pub(crate) fn len(&self) -> usize { + let vec = unsafe{&mut *self.inner}; + debug_assert_eq!(vec.len(), self.cur_size as usize, "trying to get section length, but size is larger than expected"); + return vec.len() - self.start_size; + } + #[inline] pub(crate) fn forget(self) { let vec = unsafe{&mut *self.inner}; @@ -70,6 +80,15 @@ impl ScopedSection { } } +impl std::ops::Index for ScopedSection { + type Output = T; + + fn index(&self, idx: usize) -> &Self::Output { + let vec = unsafe{&*self.inner}; + return vec[self.start_size as usize + idx] + } +} + #[cfg(debug_assertions)] impl Drop for ScopedBuffer { fn drop(&mut self) { diff --git a/src/protocol/ast.rs b/src/protocol/ast.rs index 9555ae70cd547e1518596c76def64ad49b8a6c3f..9ecb7ade96ca32cfdefc70a7e43b00fdd04b6763 100644 --- a/src/protocol/ast.rs +++ b/src/protocol/ast.rs @@ -290,6 +290,13 @@ pub enum Import { } impl Import { + pub(crate) fn span(&self) -> InputSpan { + match self { + Import::Module(v) => v.span, + Import::Symbols(v) => v.span, + } + } + pub(crate) fn as_module(&self) -> &ImportModule { match self { Import::Module(m) => m, @@ -351,235 +358,7 @@ impl PartialEq for Identifier { impl Display for Identifier { fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { - // A source identifier is in ASCII range. - write!(f, "{}", String::from_utf8_lossy(&self.value)) - } -} - -#[derive(Debug, Clone)] -pub enum NamespacedIdentifierPart { - // Regular identifier - Identifier{start: u16, end: u16}, - // Polyargs associated with a preceding identifier - PolyArgs{start: u16, end: u16}, -} - -impl NamespacedIdentifierPart { - pub(crate) fn is_identifier(&self) -> bool { - match self { - NamespacedIdentifierPart::Identifier{..} => true, - NamespacedIdentifierPart::PolyArgs{..} => false, - } - } - - pub(crate) fn as_identifier(&self) -> (u16, u16) { - match self { - NamespacedIdentifierPart::Identifier{start, end} => (*start, *end), - NamespacedIdentifierPart::PolyArgs{..} => { - unreachable!("Tried to obtain {:?} as Identifier", self); - } - } - } - - pub(crate) fn as_poly_args(&self) -> (u16, u16) { - match self { - NamespacedIdentifierPart::PolyArgs{start, end} => (*start, *end), - NamespacedIdentifierPart::Identifier{..} => { - unreachable!("Tried to obtain {:?} as PolyArgs", self) - } - } - } -} - -/// An identifier with optional namespaces and polymorphic variables. Note that -/// we allow each identifier to be followed by polymorphic arguments during the -/// parsing phase (e.g. Foo::Bar::Qux). But in our current language -/// implementation we can only have valid namespaced identifier that contain one -/// set of polymorphic arguments at the appropriate position. -/// TODO: @tokens Reimplement/rename once we have a tokenizer -#[derive(Debug, Clone)] -pub struct NamespacedIdentifier { - pub position: InputPosition, - pub value: Vec, // Full name as it resides in the input source - pub poly_args: Vec, // All poly args littered throughout the namespaced identifier - pub parts: Vec, // Indices into value/poly_args -} - -impl NamespacedIdentifier { - /// Returns the identifier value without any of the specific polymorphic - /// arguments. - pub fn strip_poly_args(&self) -> Vec { - debug_assert!(!self.parts.is_empty() && self.parts[0].is_identifier()); - - let mut result = Vec::with_capacity(self.value.len()); - let mut iter = self.iter(); - let (first_ident, _) = iter.next().unwrap(); - result.extend(first_ident); - - for (ident, _) in iter.next() { - result.push(b':'); - result.push(b':'); - result.extend(ident); - } - - result - } - - /// Returns an iterator of the elements in the namespaced identifier - pub fn iter(&self) -> NamespacedIdentifierIter { - return NamespacedIdentifierIter{ - identifier: self, - element_idx: 0 - } - } - - pub fn get_poly_args(&self) -> Option<&[ParserTypeId]> { - let has_poly_args = self.parts.iter().any(|v| !v.is_identifier()); - if has_poly_args { - Some(&self.poly_args) - } else { - None - } - } - - // Check if two namespaced identifiers match eachother when not considering - // the polymorphic arguments - pub fn matches_namespaced_identifier(&self, other: &Self) -> bool { - let mut iter_self = self.iter(); - let mut iter_other = other.iter(); - - loop { - let val_self = iter_self.next(); - let val_other = iter_other.next(); - if val_self.is_some() != val_other.is_some() { - // One is longer than the other - return false; - } - if val_self.is_none() { - // Both are none - return true; - } - - // Both are something - let (val_self, _) = val_self.unwrap(); - let (val_other, _) = val_other.unwrap(); - if val_self != val_other { return false; } - } - } - - // Check if the namespaced identifier matches an identifier when not - // considering the polymorphic arguments - pub fn matches_identifier(&self, other: &Identifier) -> bool { - let mut iter = self.iter(); - let (first_ident, _) = iter.next().unwrap(); - if first_ident != other.value { - return false; - } - - if iter.next().is_some() { - return false; - } - - return true; - } -} - -/// Iterator over elements of the namespaced identifier. The element index will -/// only ever be at the start of an identifier element. -#[derive(Debug)] -pub struct NamespacedIdentifierIter<'a> { - identifier: &'a NamespacedIdentifier, - element_idx: usize, -} - -impl<'a> Iterator for NamespacedIdentifierIter<'a> { - type Item = (&'a [u8], Option<&'a [ParserTypeId]>); - fn next(&mut self) -> Option { - match self.get(self.element_idx) { - Some((ident, poly)) => { - self.element_idx += 1; - if poly.is_some() { - self.element_idx += 1; - } - Some((ident, poly)) - }, - None => None - } - } -} - -impl<'a> NamespacedIdentifierIter<'a> { - /// Returns number of parts iterated over, may not correspond to number of - /// times one called `next()` because returning an identifier with - /// polymorphic arguments increments the internal counter by 2. - pub fn num_returned(&self) -> usize { - return self.element_idx; - } - - pub fn num_remaining(&self) -> usize { - return self.identifier.parts.len() - self.element_idx; - } - - pub fn returned_section(&self) -> &[u8] { - if self.element_idx == 0 { return &self.identifier.value[0..0]; } - - let last_idx = match &self.identifier.parts[self.element_idx - 1] { - NamespacedIdentifierPart::Identifier{end, ..} => *end, - NamespacedIdentifierPart::PolyArgs{end, ..} => *end, - }; - - return &self.identifier.value[..last_idx as usize]; - } - - /// Returns a specific element from the namespaced identifier - pub fn get(&self, idx: usize) -> Option<::Item> { - if idx >= self.identifier.parts.len() { - return None - } - - let cur_part = &self.identifier.parts[idx]; - let next_part = self.identifier.parts.get(idx + 1); - - let (ident_start, ident_end) = cur_part.as_identifier(); - let poly_slice = match next_part { - Some(part) => match part { - NamespacedIdentifierPart::Identifier{..} => None, - NamespacedIdentifierPart::PolyArgs{start, end} => Some( - &self.identifier.poly_args[*start as usize..*end as usize] - ), - }, - None => None - }; - - Some(( - &self.identifier.value[ident_start as usize..ident_end as usize], - poly_slice - )) - } - - /// Returns the previously returend index into the parts array of the - /// identifier. - pub fn prev_idx(&self) -> Option { - if self.element_idx == 0 { - return None; - }; - - if self.identifier.parts[self.element_idx - 1].is_identifier() { - return Some(self.element_idx - 1); - } - - // Previous part had polymorphic arguments, so the one before that must - // be an identifier (if well formed) - debug_assert!(self.element_idx >= 2 && self.identifier.parts[self.element_idx - 2].is_identifier()); - return Some(self.element_idx - 2) - } - - /// Returns the previously returned result from `next()` - pub fn prev(&self) -> Option<::Item> { - match self.prev_idx() { - None => None, - Some(idx) => self.get(idx) - } + write!(f, "{}", self.value.as_str()) } } @@ -593,6 +372,7 @@ pub enum ParserTypeVariant { Character, String, // Literals (need to get concrete builtin type during typechecking) IntegerLiteral, + // Marker for inference Inferred, // Builtins expecting one subsequent type Array, @@ -631,21 +411,6 @@ pub struct ParserType { pub elements: Vec } -/// SymbolicParserType is the specification of a symbolic type. During the -/// parsing phase we will only store the identifier of the type. During the -/// validation phase we will determine whether it refers to a user-defined type, -/// or a polymorphic argument. After the validation phase it may still be the -/// case that the resulting `variant` will not pass the typechecker. -#[derive(Debug, Clone)] -pub struct SymbolicParserType { - // Phase 1: parser - pub identifier: NamespacedIdentifier, - // Phase 2: validation/linking (for types in function/component bodies) and - // type table construction (for embedded types of structs/unions) - pub poly_args2: Vec, // taken from identifier or inferred - pub variant: Option -} - /// Specifies whether the symbolic type points to an actual user-defined type, /// or whether it points to a polymorphic argument within the definition (e.g. /// a defined variable `T var` within a function `int func()` @@ -1000,6 +765,15 @@ impl Definition { _ => panic!("Unable to cast `Definition` to `Function`"), } } + pub fn defined_in(&self) -> RootId { + match self { + Definition::Struct(def) => def.defined_in, + Definition::Enum(def) => def.defined_in, + Definition::Union(def) => def.defined_in, + Definition::Component(def) => def.defined_in, + Definition::Function(def) => def.defined_in, + } + } pub fn identifier(&self) -> &Identifier { match self { Definition::Struct(def) => &def.identifier, @@ -1018,12 +792,11 @@ impl Definition { _ => &EMPTY_VEC, } } - pub fn body(&self) -> StatementId { - // TODO: Fix this + pub fn body(&self) -> BlockStatementId { match self { Definition::Component(com) => com.body, Definition::Function(fun) => fun.body, - _ => panic!("cannot retrieve body (for EnumDefinition or StructDefinition)") + _ => panic!("cannot retrieve body (for EnumDefinition/UnionDefinition or StructDefinition)") } } pub fn poly_vars(&self) -> &Vec { @@ -1047,6 +820,7 @@ pub struct StructFieldDefinition { #[derive(Debug, Clone)] pub struct StructDefinition { pub this: StructDefinitionId, + pub defined_in: RootId, // Phase 1: symbol scanning pub span: InputSpan, pub identifier: Identifier, @@ -1056,8 +830,11 @@ pub struct StructDefinition { } impl StructDefinition { - pub(crate) fn new_empty(this: StructDefinitionId, span: InputSpan, identifier: Identifier, poly_vars: Vec) -> Self { - Self{ this, span, identifier, poly_vars, fields: Vec::new() } + pub(crate) fn new_empty( + this: StructDefinitionId, defined_in: RootId, span: InputSpan, + identifier: Identifier, poly_vars: Vec + ) -> Self { + Self{ this, defined_in, span, identifier, poly_vars, fields: Vec::new() } } } @@ -1076,6 +853,7 @@ pub struct EnumVariantDefinition { #[derive(Debug, Clone)] pub struct EnumDefinition { pub this: EnumDefinitionId, + pub defined_in: RootId, // Phase 1: symbol scanning pub span: InputSpan, pub identifier: Identifier, @@ -1085,8 +863,11 @@ pub struct EnumDefinition { } impl EnumDefinition { - pub(crate) fn new_empty(this: EnumDefinitionId, span: InputSpan, identifier: Identifier, poly_vars: Vec) -> Self { - Self{ this, span, identifier, poly_vars, variants: Vec::new() } + pub(crate) fn new_empty( + this: EnumDefinitionId, defined_in: RootId, span: InputSpan, + identifier: Identifier, poly_vars: Vec + ) -> Self { + Self{ this, defined_in, span, identifier, poly_vars, variants: Vec::new() } } } @@ -1106,6 +887,7 @@ pub struct UnionVariantDefinition { #[derive(Debug, Clone)] pub struct UnionDefinition { pub this: UnionDefinitionId, + pub defined_in: RootId, // Phase 1: symbol scanning pub span: InputSpan, pub identifier: Identifier, @@ -1115,8 +897,11 @@ pub struct UnionDefinition { } impl UnionDefinition { - pub(crate) fn new_empty(this: UnionDefinitionId, span: InputSpan, identifier: Identifier, poly_vars: Vec) -> Self { - Self{ this, span, identifier, poly_vars, variants: Vec::new() } + pub(crate) fn new_empty( + this: UnionDefinitionId, defined_in: RootId, span: InputSpan, + identifier: Identifier, poly_vars: Vec + ) -> Self { + Self{ this, defined_in, span, identifier, poly_vars, variants: Vec::new() } } } @@ -1129,6 +914,7 @@ pub enum ComponentVariant { #[derive(Debug, Clone)] pub struct ComponentDefinition { pub this: ComponentDefinitionId, + pub defined_in: RootId, // Phase 1: symbol scanning pub span: InputSpan, pub variant: ComponentVariant, @@ -1136,15 +922,18 @@ pub struct ComponentDefinition { pub poly_vars: Vec, // Phase 2: parsing pub parameters: Vec, - pub body: StatementId, + pub body: BlockStatementId, } impl ComponentDefinition { - pub(crate) fn new_empty(this: ComponentDefinitionId, span: InputSpan, variant: ComponentVariant, identifier: Identifier, poly_vars: Vec) -> Self { + pub(crate) fn new_empty( + this: ComponentDefinitionId, defined_in: RootId, span: InputSpan, + variant: ComponentVariant, identifier: Identifier, poly_vars: Vec + ) -> Self { Self{ - this, span, variant, identifier, poly_vars, + this, defined_in, span, variant, identifier, poly_vars, parameters: Vec::new(), - body: StatementId::new_invalid() + body: BlockStatementId::new_invalid() } } } @@ -1154,6 +943,7 @@ impl ComponentDefinition { #[derive(Debug, Clone)] pub struct FunctionDefinition { pub this: FunctionDefinitionId, + pub defined_in: RootId, // Phase 1: symbol scanning pub builtin: bool, pub span: InputSpan, @@ -1162,18 +952,21 @@ pub struct FunctionDefinition { // Phase 2: parsing pub return_types: Vec, pub parameters: Vec, - pub body: StatementId, + pub body: BlockStatementId, } impl FunctionDefinition { - pub(crate) fn new_empty(this: FunctionDefinitionId, span: InputSpan, identifier: Identifier, poly_vars: Vec) -> Self { + pub(crate) fn new_empty( + this: FunctionDefinitionId, defined_in: RootId, span: InputSpan, + identifier: Identifier, poly_vars: Vec + ) -> Self { Self { - this, + this, defined_in, builtin: false, span, identifier, poly_vars, return_types: Vec::new(), parameters: Vec::new(), - body: StatementId::new_invalid(), + body: BlockStatementId::new_invalid(), } } } @@ -1536,9 +1329,8 @@ pub struct IfStatement { #[derive(Debug, Clone)] pub struct EndIfStatement { pub this: EndIfStatementId, - // Phase 2: linker pub start_if: IfStatementId, - pub position: InputPosition, // of corresponding if statement + // Phase 2: linker pub next: Option, } @@ -1557,9 +1349,8 @@ pub struct WhileStatement { #[derive(Debug, Clone)] pub struct EndWhileStatement { pub this: EndWhileStatementId, - // Phase 2: linker pub start_while: WhileStatementId, - pub position: InputPosition, // of corresponding while + // Phase 2: linker pub next: Option, } @@ -1597,9 +1388,8 @@ pub struct SynchronousStatement { #[derive(Debug, Clone)] pub struct EndSynchronousStatement { pub this: EndSynchronousStatementId, - // Phase 2: linker - pub position: InputPosition, // of corresponding sync statement pub start_sync: SynchronousStatementId, + // Phase 2: linker pub next: Option, } diff --git a/src/protocol/parser/depth_visitor.rs b/src/protocol/parser/depth_visitor.rs index 71dce7197addfb878f454c4b608e1b2f7ff3ca67..a3d1f7823e2879160495ca3b31cf569db85b8e33 100644 --- a/src/protocol/parser/depth_visitor.rs +++ b/src/protocol/parser/depth_visitor.rs @@ -29,13 +29,13 @@ pub(crate) trait Visitor: Sized { fn visit_union_definition(&mut self, _h: &mut Heap, _def: UnionId) -> VisitorResult { Ok(()) } - fn visit_component_definition(&mut self, h: &mut Heap, def: ComponentId) -> VisitorResult { + fn visit_component_definition(&mut self, h: &mut Heap, def: ComponentDefinitionId) -> VisitorResult { recursive_component_definition(self, h, def) } - fn visit_composite_definition(&mut self, h: &mut Heap, def: ComponentId) -> VisitorResult { + fn visit_composite_definition(&mut self, h: &mut Heap, def: ComponentDefinitionId) -> VisitorResult { recursive_composite_definition(self, h, def) } - fn visit_primitive_definition(&mut self, h: &mut Heap, def: ComponentId) -> VisitorResult { + fn visit_primitive_definition(&mut self, h: &mut Heap, def: ComponentDefinitionId) -> VisitorResult { recursive_primitive_definition(self, h, def) } fn visit_function_definition(&mut self, h: &mut Heap, def: FunctionId) -> VisitorResult { @@ -260,7 +260,7 @@ fn recursive_symbol_definition( fn recursive_component_definition( this: &mut T, h: &mut Heap, - def: ComponentId, + def: ComponentDefinitionId, ) -> VisitorResult { let component_variant = h[def].variant; match component_variant { @@ -272,23 +272,23 @@ fn recursive_component_definition( fn recursive_composite_definition( this: &mut T, h: &mut Heap, - def: ComponentId, + def: ComponentDefinitionId, ) -> VisitorResult { for ¶m in h[def].parameters.clone().iter() { recursive_parameter_as_variable(this, h, param)?; } - this.visit_statement(h, h[def].body) + this.visit_block_statement(h, h[def].body) } fn recursive_primitive_definition( this: &mut T, h: &mut Heap, - def: ComponentId, + def: ComponentDefinitionId, ) -> VisitorResult { for ¶m in h[def].parameters.clone().iter() { recursive_parameter_as_variable(this, h, param)?; } - this.visit_statement(h, h[def].body) + this.visit_block_statement(h, h[def].body) } fn recursive_function_definition( @@ -374,8 +374,11 @@ fn recursive_if_statement( stmt: IfStatementId, ) -> VisitorResult { this.visit_expression(h, h[stmt].test)?; - this.visit_statement(h, h[stmt].true_body)?; - this.visit_statement(h, h[stmt].false_body) + this.visit_block_statement(h, h[stmt].true_body)?; + if let Some(false_body) = h[stmt].false_body { + this.visit_block_statement(h, false_body)?; + } + Ok(()) } fn recursive_while_statement( @@ -384,7 +387,7 @@ fn recursive_while_statement( stmt: WhileStatementId, ) -> VisitorResult { this.visit_expression(h, h[stmt].test)?; - this.visit_statement(h, h[stmt].body) + this.visit_block_statement(h, h[stmt].body) } fn recursive_synchronous_statement( @@ -396,7 +399,7 @@ fn recursive_synchronous_statement( // for ¶m in h[stmt].parameters.clone().iter() { // recursive_parameter_as_variable(this, h, param)?; // } - this.visit_statement(h, h[stmt].body) + this.visit_block_statement(h, h[stmt].body) } fn recursive_return_statement( @@ -561,14 +564,14 @@ impl NestedSynchronousStatements { } impl Visitor for NestedSynchronousStatements { - fn visit_composite_definition(&mut self, h: &mut Heap, def: ComponentId) -> VisitorResult { + fn visit_composite_definition(&mut self, h: &mut Heap, def: ComponentDefinitionId) -> VisitorResult { assert!(!self.illegal); self.illegal = true; recursive_composite_definition(self, h, def)?; self.illegal = false; Ok(()) } - fn visit_function_definition(&mut self, h: &mut Heap, def: FunctionId) -> VisitorResult { + fn visit_function_definition(&mut self, h: &mut Heap, def: FunctionDefinitionId) -> VisitorResult { assert!(!self.illegal); self.illegal = true; recursive_function_definition(self, h, def)?; @@ -607,7 +610,7 @@ impl ChannelStatementOccurrences { } impl Visitor for ChannelStatementOccurrences { - fn visit_primitive_definition(&mut self, h: &mut Heap, def: ComponentId) -> VisitorResult { + fn visit_primitive_definition(&mut self, h: &mut Heap, def: ComponentDefinitionId) -> VisitorResult { assert!(!self.illegal); self.illegal = true; recursive_primitive_definition(self, h, def)?; @@ -644,7 +647,7 @@ impl FunctionStatementReturns { } impl Visitor for FunctionStatementReturns { - fn visit_component_definition(&mut self, _h: &mut Heap, _def: ComponentId) -> VisitorResult { + fn visit_component_definition(&mut self, _h: &mut Heap, _def: ComponentDefinitionId) -> VisitorResult { Ok(()) } fn visit_variable_declaration(&mut self, _h: &mut Heap, _decl: VariableId) -> VisitorResult { @@ -698,14 +701,14 @@ impl ComponentStatementReturnNew { } impl Visitor for ComponentStatementReturnNew { - fn visit_component_definition(&mut self, h: &mut Heap, def: ComponentId) -> VisitorResult { + fn visit_component_definition(&mut self, h: &mut Heap, def: ComponentDefinitionId) -> VisitorResult { assert!(!(self.illegal_new || self.illegal_return)); self.illegal_return = true; recursive_component_definition(self, h, def)?; self.illegal_return = false; Ok(()) } - fn visit_primitive_definition(&mut self, h: &mut Heap, def: ComponentId) -> VisitorResult { + fn visit_primitive_definition(&mut self, h: &mut Heap, def: ComponentDefinitionId) -> VisitorResult { assert!(!self.illegal_new); self.illegal_new = true; recursive_primitive_definition(self, h, def)?; @@ -875,13 +878,15 @@ impl Visitor for LinkStatements { let end_if_id = end_if_id.unwrap(); assert!(self.prev.is_none()); - self.visit_statement(h, h[stmt].true_body)?; + self.visit_block_statement(h, h[stmt].true_body)?; if let Some(UniqueStatementId(prev)) = self.prev.take() { h[prev].link_next(end_if_id.upcast()); } assert!(self.prev.is_none()); - self.visit_statement(h, h[stmt].false_body)?; + if let Some(false_body) = h[stmt].false_body { + self.visit_block_statement(h, false_body)?; + } if let Some(UniqueStatementId(prev)) = self.prev.take() { h[prev].link_next(end_if_id.upcast()); } @@ -903,7 +908,7 @@ impl Visitor for LinkStatements { // let end_while_id = end_while_id.unwrap(); assert!(self.prev.is_none()); - self.visit_statement(h, h[stmt].body)?; + self.visit_block_statement(h, h[stmt].body)?; // The body's next statement loops back to the while statement itself // Note: continue statements also loop back to the while statement itself if let Some(UniqueStatementId(prev)) = self.prev.take() { @@ -941,7 +946,7 @@ impl Visitor for LinkStatements { let end_sync_id = end_sync_id.unwrap(); assert!(self.prev.is_none()); - self.visit_statement(h, h[stmt].body)?; + self.visit_block_statement(h, h[stmt].body)?; // The body's next statement points to the pseudo element if let Some(UniqueStatementId(prev)) = self.prev.take() { h[prev].link_next(end_sync_id.upcast()); diff --git a/src/protocol/parser/mod.rs b/src/protocol/parser/mod.rs index b4255992c152a1ac0e99557098a35cd88b71e6cb..478de8cde3f09f5784c21a1ce4bd2f60b92a5cfb 100644 --- a/src/protocol/parser/mod.rs +++ b/src/protocol/parser/mod.rs @@ -29,10 +29,6 @@ use crate::protocol::lexer::*; use std::collections::HashMap; use crate::protocol::ast_printer::ASTWriter; -pub(crate) const LIMIT_NUM_TYPE_NODES: usize = 64; -pub(crate) const LIMIT_NUM_POLY_VARS: usize = 64; -pub(crate) const LIMIT_NUM_PROC_ARGS: usize = 64; - #[derive(PartialEq, Eq)] pub enum ModuleCompilationPhase { Source, // only source is set @@ -40,6 +36,7 @@ pub enum ModuleCompilationPhase { SymbolsScanned, // all definitions are linked to their type class ImportsResolved, // all imports are added to the symbol table DefinitionsParsed, // produced the AST for the entire module + TypesParsed, // added all definitions to the type table ValidatedAndLinked, // AST is traversed and has linked the required AST nodes Typed, // Type inference and checking has been performed } diff --git a/src/protocol/parser/pass_definitions.rs b/src/protocol/parser/pass_definitions.rs index 93e2f6d84bae1012ea81696bad3018a5b21c3dbf..3fb0c35ea5128b0a6f661c8a50680d2d0042ee26 100644 --- a/src/protocol/parser/pass_definitions.rs +++ b/src/protocol/parser/pass_definitions.rs @@ -16,8 +16,9 @@ pub(crate) struct PassDefinitions { struct_fields: Vec, enum_variants: Vec, union_variants: Vec, - parameters: Vec, + parameters: ScopedBuffer, expressions: ScopedBuffer, + statements: ScopedBuffer, parser_types: Vec, } @@ -28,17 +29,22 @@ impl PassDefinitions { debug_assert_eq!(module.phase, ModuleCompilationPhase::ImportsResolved); debug_assert_eq!(module_range.range_kind, TokenRangeKind::Module); - // TODO: Very important to go through ALL ranges of the module so that we parse the entire - // input source. Only skip the ones we're certain we've handled before. + // Although we only need to parse the definitions, we want to go through + // code ranges as well such that we can throw errors if we get + // unexpected tokens at the module level of the source. let mut range_idx = module_range.first_child_idx; loop { let range_idx_usize = range_idx as usize; let cur_range = &module.tokens.ranges[range_idx_usize]; - if cur_range.range_kind == TokenRangeKind::Definition { - self.visit_definition_range(modules, module_idx, ctx, range_idx_usize)?; + match cur_range.range_kind { + TokenRangeKind::Module => unreachable!(), // should not be reachable + TokenRangeKind::Pragma | TokenRangeKind::Import => continue, // already fully parsed + TokenRangeKind::Definition | TokenRangeKind::Code => {} } + self.visit_range(modules, module_idx, ctx, range_idx_usize)?; + match cur_range.next_sibling_idx { Some(idx) => { range_idx = idx; }, None => { break; }, @@ -50,39 +56,34 @@ impl PassDefinitions { Ok(()) } - fn visit_definition_range( + fn visit_range( &mut self, modules: &[Module], module_idx: usize, ctx: &mut PassCtx, range_idx: usize ) -> Result<(), ParseError> { let module = &modules[module_idx]; let cur_range = &module.tokens.ranges[range_idx]; - debug_assert_eq!(cur_range.range_kind, TokenRangeKind::Definition); + debug_assert!(cur_range.range_kind == TokenRangeKind::Definition || cur_range.range_kind == TokenRangeKind::Code); // Detect which definition we're parsing let mut iter = module.tokens.iter_range(cur_range); - let keyword = peek_ident(&module.source, &mut iter).unwrap(); - match keyword { - KW_STRUCT => { - - }, - KW_ENUM => { - - }, - KW_UNION => { - - }, - KW_FUNCTION => { - - }, - KW_PRIMITIVE => { - - }, - KW_COMPOSITE => { - - }, - _ => unreachable!("encountered keyword '{}' in definition range", String::from_utf8_lossy(keyword)), - }; + loop { + let next = iter.next(); + if next.is_none() { + return Ok(()) + } - Ok(()) + // Token was not None, so peek_ident returns None if not an ident + let ident = peek_ident(&module.source, &mut iter); + match ident { + Some(KW_STRUCT) => self.visit_struct_definition(module, &mut iter, ctx)?, + Some(KW_ENUM) => self.visit_enum_definition(module, &mut iter, ctx)?, + Some(KW_FUNCTION) => self.visit_function_definition(module, &mut iter, ctx)?, + Some(KW_PRIMITIVE) | Some(KW_COMPOSITE) => self.visit_component_definition(module, &mut iter, ctx)?, + _ => return Err(ParseError::new_error_str_at_pos( + &module.source, iter.last_valid_pos(), + "unexpected symbol, expected some kind of type or procedure definition" + )), + } + } } fn visit_struct_definition( @@ -231,67 +232,187 @@ impl PassDefinitions { let poly_vars = ctx.heap[definition_id].poly_vars(); // Parse function's argument list + let mut parameter_section = self.parameters.start_section(); consume_parameter_list( - source, iter, ctx, &mut self.parameters, poly_vars, module_scope, definition_id + source, iter, ctx, &mut parameter_section, poly_vars, module_scope, definition_id )?; - let parameters = self.parameters.clone(); - self.parameters.clear(); + let parameters = parameter_section.into_vec(); // Consume return types consume_token(&module.source, iter, TokenKind::ArrowRight)?; + let mut open_curly_pos = iter.last_valid_pos(); consume_comma_separated_until( TokenKind::OpenCurly, &module.source, iter, |source, iter| { - consume_parser_type(source, iter, &ctx.symbols, &ctx.heap, poly_vars, module_scope, definition_id, false) + consume_parser_type(source, iter, &ctx.symbols, &ctx.heap, poly_vars, module_scope, definition_id, false, 0) }, - &mut self.parser_types, "a return type", None + &mut self.parser_types, "a return type", Some(&mut open_curly_pos) )?; let return_types = self.parser_types.clone(); - self.parser_types.clear(); + + // TODO: @ReturnValues + match return_types.len() { + 0 => return Err(ParseError::new_error_str_at_pos(&module.source, open_curly_pos, "expected a return type")), + 1 => {}, + _ => return Err(ParseError::new_error_str_at_pos(&module.source, open_curly_pos, "multiple return types are not (yet) allowed")), + } // Consume block + let body = self.consume_block_statement_without_leading_curly(module, iter, ctx, open_curly_pos)?; + + // Assign everything in the preallocated AST node + let function = ctx.heap[definition_id].as_function_mut(); + function.return_types = return_types; + function.parameters = parameters; + function.body = body; + + Ok(()) + } + + fn visit_component_definition( + &mut self, module: &Module, iter: &mut TokenIter, ctx: &mut PassCtx + ) -> Result<(), ParseError> { + let (_variant_text, _) = consume_any_ident(&module.source, iter)?; + debug_assert!(variant_text == KW_PRIMITIVE || variant_text == KW_COMPOSITE); + let (ident_text, _) = consume_ident(&module.source, iter)?; + + // Retrieve preallocated definition + let module_scope = SymbolScope::Module(module.root_id); + let definition_id = ctx.symbols.get_symbol_by_name_defined_in_scope(module_scope, ident_text) + .unwrap().variant.as_definition().definition_id; + let poly_vars = ctx.heap[definition_id].poly_vars(); + + // Parse component's argument list + let mut parameter_section = self.parameters.start_section(); + consume_parameter_list( + source, iter, ctx, &mut parameter_section, poly_vars, module_scope, definition_id + )?; + let parameters = parameter_section.into_vec(); + + // Consume block + let body = self.consume_block_statement(module, iter, ctx)?; + + // Assign everything in the AST node + let component = ctx.heap[definition_id].as_component_mut(); + component.parameters = parameters; + component.body = body; + + Ok(()) + } + + /// Consumes a block statement. If the resulting statement is not a block + /// (e.g. for a shorthand "if (expr) single_statement") then it will be + /// wrapped in one + fn consume_block_or_wrapped_statement( + &mut self, module: &Module, iter: &mut TokenIter, ctx: &mut PassCtx + ) -> Result { + if Some(TokenKind::OpenCurly) == iter.next() { + // This is a block statement + self.consume_block_statement(module, iter, ctx) + } else { + // Not a block statement, so wrap it in one + let mut statements = self.statements.start_section(); + let wrap_begin_pos = iter.last_valid_pos(); + self.consume_statement(module, iter, ctx, &mut statements)?; + let wrap_end_pos = iter.last_valid_pos(); + + debug_assert_eq!(statements.len(), 1); + let statements = statements.into_vec(); + + ctx.heap.alloc_block_statement(|this| BlockStatement{ + this, + is_implicit: true, + span: InputSpan::from_positions(wrap_begin_pos, wrap_end_pos), // TODO: @Span + statements, + parent_scope: None, + relative_pos_in_parent: 0, + locals: Vec::new(), + labels: Vec::new() + }) + } } /// Consumes a statement and returns a boolean indicating whether it was a /// block or not. fn consume_statement( - &mut self, module: &Module, iter: &mut TokenIter, ctx: &mut PassCtx - ) -> Result<(StatementId, bool), ParseError> { + &mut self, module: &Module, iter: &mut TokenIter, ctx: &mut PassCtx, section: &mut ScopedSection + ) -> Result<(), ParseError> { let next = iter.next().expect("consume_statement has a next token"); - let mut was_block = false; - let statement = if next == TokenKind::OpenCurly { - was_block = true; - self.consume_block_statement(module, iter, ctx)?.upcast() + if next == TokenKind::OpenCurly { + let id = self.consume_block_statement(module, iter, ctx)?; + section.push(id.upcast()); } else if next == TokenKind::Ident { let (ident, _) = consume_any_ident(source, iter)?; if ident == KW_STMT_IF { - self.consume_if_statement(module, iter, ctx)? + // Consume if statement and place end-if statement directly + // after it. + let id = self.consume_if_statement(module, iter, ctx)?; + section.push(id.upcast()); + + let end_if = ctx.heap.alloc_end_if_statement(|this| EndIfStatement{ + this, start_if: id, next: None + }); + section.push(id.upcast()); + + let if_stmt = &mut ctx.heap[id]; + if_stmt.end_if = Some(end_if); } else if ident == KW_STMT_WHILE { - self.consume_while_statement(module, iter, ctx)? + let id = self.consume_while_statement(module, iter, ctx)?; + section.push(id.upcast()); + + let end_while = ctx.heap.alloc_end_while_statement(|this| EndWhileStatement{ + this, start_while: id, next: None + }); + section.push(id.upcast()); + + let while_stmt = &mut ctx.heap[id]; + while_stmt.end_while = Some(end_while); } else if ident == KW_STMT_BREAK { - self.consume_break_statement(module, iter, ctx)? + let id = self.consume_break_statement(module, iter, ctx)?; + section.push(id.upcast()); } else if ident == KW_STMT_CONTINUE { - self.consume_continue_statement(module, iter, ctx)? + let id = self.consume_continue_statement(module, iter, ctx)?; + section.push(id.upcast()); } else if ident == KW_STMT_SYNC { - self.consume_synchronous_statement(module, iter, ctx)? + let id = self.consume_synchronous_statement(module, iter, ctx)?; + section.push(id.upcast()); + + let end_sync = ctx.heap.alloc_end_synchronous_statement(|this| EndSynchronousStatement{ + this, start_sync: id, next: None + }); + + let sync_stmt = &mut ctx.heap[id]; + sync_stmt.end_sync = Some(end_sync); } else if ident == KW_STMT_RETURN { - self.consume_return_statement(module, iter, ctx)? + let id = self.consume_return_statement(module, iter, ctx)?; + section.push(id.upcast()); } else if ident == KW_STMT_GOTO { - self.consume_goto_statement(module, iter, ctx)? + let id = self.consume_goto_statement(module, iter, ctx)?; + section.push(id.upcast()); } else if ident == KW_STMT_NEW { - self.consume_new_statement(module, iter, ctx)? + let id = self.consume_new_statement(module, iter, ctx)?; + section.push(id.upcast()); } else if ident == KW_STMT_CHANNEL { - self.consume_channel_statement(module, iter, ctx)? + let id = self.consume_channel_statement(module, iter, ctx)?; + section.push(id.upcast().upcast()); } else if iter.peek() == Some(TokenKind::Colon) { - self.consume_labeled_statement(module, iter, ctx)? + self.consume_labeled_statement(module, iter, ctx, section)?; } else { - // Attempt to parse as expression - self.consume_expression_statement(module, iter, ctx)? + // Two fallback possibilities: the first one is a memory + // declaration, the other one is to parse it as a regular + // expression. This is a bit ugly + if let Some((memory_stmt_id, assignment_stmt_id)) = self.maybe_consume_memory_statement(module, iter, ctx)? { + section.push(memory_stmt_id.upcast().upcast()); + section.push(assignment_stmt_id.upcast()); + } else { + let id = self.consume_expression_statement(module, iter, ctx)?; + section.push(id.upcast()); + } } }; - return Ok((statement, was_block)); + return Ok(()); } fn consume_block_statement( @@ -332,13 +453,12 @@ impl PassDefinitions { consume_token(&module.source, iter, TokenKind::OpenParen)?; let test = self.consume_expression(module, iter, ctx)?; consume_token(&module.source, iter, TokenKind::CloseParen)?; - let (true_body, was_block) = self.consume_statement(module, iter, ctx)?; - let true_body = Self::wrap_in_block(ctx, true_body, was_block); + let true_body = self.consume_block_or_wrapped_statement(module, iter, ctx)?; let false_body = if has_ident(source, iter, KW_STMT_ELSE) { iter.consume(); - let (false_body, was_block) = self.consume_statement(module, iter, ctx)?; - Some(Self::wrap_in_block(ctx, false_body, was_block)) + let false_body = self.consume_block_or_wrapped_statement(module, iter, ctx)?; + Some(false_body) } else { None }; @@ -360,8 +480,7 @@ impl PassDefinitions { consume_token(&module.source, iter, TokenKind::OpenParen)?; let test = self.consume_expression(module, iter, ctx)?; consume_token(&module.source, iter, TokenKind::CloseParen)?; - let (body, was_block) = self.consume_statement(module, iter, ctx)?; - let body = Self::wrap_in_block(ctx, body, was_block); + let body = self.consume_block_or_wrapped_statement(module, iter, ctx)?; Ok(ctx.heap.alloc_while_statement(|this| WhileStatement{ this, @@ -415,8 +534,8 @@ impl PassDefinitions { &mut self, module: &Module, iter: &mut TokenIter, ctx: &mut PassCtx ) -> Result { let synchronous_span = consume_exact_ident(&module.source, iter, KW_STMT_SYNC)?; - let (body, was_block) = self.consume_statement(module, iter, ctx)?; - let body = Self::wrap_in_block(ctx, body, was_block); + let body = self.consume_block_or_wrapped_statement(module, iter, ctx)?; + Ok(ctx.heap.alloc_synchronous_statement(|this| SynchronousStatement{ this, span: synchronous_span, @@ -553,17 +672,118 @@ impl PassDefinitions { } fn consume_labeled_statement( - &mut self, module: &Module, iter: &mut TokenIter, ctx: &mut PassCtx - ) -> Result { + &mut self, module: &Module, iter: &mut TokenIter, ctx: &mut PassCtx, section: &mut ScopedSection + ) -> Result<(), ParseError> { let label = consume_ident_interned(&module.source, iter, ctx)?; consume_token(&module.source, iter, TokenKind::Colon)?; - let (body, _) = self.consume_statement(module, iter, ctx)?; - Ok(ctx.heap.alloc_labeled_statement(|this| LabeledStatement{ - this, label, body, + // Not pretty: consume_statement may produce more than one statement. + // The values in the section need to be in the correct order if some + // kind of outer block is consumed, so we take another section, push + // the expressions in that one, and then allocate the labeled statement. + let mut inner_section = self.statements.start_section(); + self.consume_statement(module, iter, ctx, &mut inner_section)?; + debug_assert!(inner_section.len() >= 1); + + let stmt_id = ctx.heap.alloc_labeled_statement(|this| LabeledStatement { + this, + label, + body: *inner_section[0], relative_pos_in_block: 0, - in_sync: None - })) + in_sync: None, + }); + + if inner_section.len() == 1 { + // Produce the labeled statement pointing to the first statement. + // This is by far the most common case. + inner_section.forget(); + section.push(stmt_id.upcast()); + } else { + // Produce the labeled statement using the first statement, and push + // the remaining ones at the end. + let inner_statements = inner_section.into_vec(); + section.push(stmt_id.upcast()); + for idx in 1..inner_statements.len() { + section.push(inner_statements[idx]) + } + } + + Ok(()) + } + + fn maybe_consume_memory_statement( + &mut self, module: &Module, iter: &mut TokenIter, ctx: &mut PassCtx + ) -> Result, ParseError> { + // This is a bit ugly. It would be nicer if we could somehow + // consume the expression with a type hint if we do get a valid + // type, but we don't get an identifier following it + let iter_state = iter.save(); + let definition_id = self.cur_definition; + let poly_vars = ctx.heap[definition_id].poly_vars(); + + let parser_type = consume_parser_type( + &module.source, iter, &ctx.symbols, &ctx.heap, poly_vars, + SymbolScope::Definition(definition_id), definition_id, true, 0 + ); + + if let Ok(parser_type) = parser_type { + if Some(TokenKind::Ident) == iter.next() { + // Assume this is a proper memory statement + let identifier = consume_ident_interned(&module.source, iter, ctx)?; + let memory_span = InputSpan::from_positions(parser_type.elements[0].full_span.begin, identifier.span.end); + let assign_span = consume_token(&module.source, iter, TokenKind::Equal)?; + + let initial_expr_begin_pos = iter.last_valid_pos(); + let initial_expr_id = self.consume_expression(module, iter, ctx)?; + let initial_expr_end_pos = iter.last_valid_pos(); + consume_token(&module.source, iter, TokenKind::SemiColon)?; + + // Allocate the memory statement with the variable + let local_id = ctx.heap.alloc_local(|this| Local{ + this, + identifier: identifier.clone(), + parser_type, + relative_pos_in_block: 0, + }); + let memory_stmt_id = ctx.heap.alloc_memory_statement(|this| MemoryStatement{ + this, + span: memory_span, + variable: local_id, + next: None + }); + + // Allocate the initial assignment + let variable_expr_id = ctx.heap.alloc_variable_expression(|this| VariableExpression{ + this, + identifier, + declaration: None, + parent: ExpressionParent::None, + concrete_type: Default::default() + }); + let assignment_expr_id = ctx.heap.alloc_assignment_expression(|this| AssignmentExpression{ + this, + span: assign_span, + left: variable_expr_id.upcast(), + operation: AssignmentOperator::Set, + right: initial_expr_id, + parent: ExpressionParent::None, + concrete_type: Default::default(), + }); + let assignment_stmt_id = ctx.heap.alloc_expression_statement(|this| ExpressionStatement{ + this, + span: InputSpan::from_positions(initial_expr_begin_pos, initial_expr_end_pos), + expression: assignment_expr_id.upcast(), + next: None, + }); + + return Ok(Some((memory_stmt_id, assignment_stmt_id))) + } + } + + // If here then one of the preconditions for a memory statement was not + // met. So recover the iterator and return + iter.load(iter_state); + Ok(None) } fn consume_expression_statement( @@ -1226,24 +1446,6 @@ impl PassDefinitions { )?; Ok(section.into_vec()) } - - fn wrap_in_block(ctx: &mut PassCtx, statement: StatementId, was_block: bool) -> BlockStatementId { - debug_assert_eq!(was_block, ctx.heap[statement].is_block()); - if was_block { - return BlockStatementId(StatementId::new(statement.index)); // Yucky - } - - ctx.heap.alloc_block_statement(|this| BlockStatement{ - this, - is_implicit: true, - span: ctx.heap[statement].span(), - statements: vec![statement], - parent_scope: None, - relative_pos_in_parent: 0, - locals: Vec::new(), - labels: Vec::new(), - }) - } } /// Consumes a type. A type always starts with an identifier which may indicate @@ -1598,7 +1800,7 @@ fn consume_polymorphic_vars_spilled(source: &InputSource, iter: &mut TokenIter) /// Consumes the parameter list to functions/components fn consume_parameter_list( - source: &InputSource, iter: &mut TokenIter, ctx: &mut PassCtx, target: &mut Vec, + source: &InputSource, iter: &mut TokenIter, ctx: &mut PassCtx, target: &mut ScopedSection, poly_vars: &[Identifier], scope: SymbolScope, definition_id: DefinitionId ) -> Result<(), ParseError> { consume_comma_separated( diff --git a/src/protocol/parser/pass_imports.rs b/src/protocol/parser/pass_imports.rs index 388d6a3f33119cf93551e89096e2eff8dd65a86c..9e76cabeb22181a21e7bfa30779446ea8ce86c05 100644 --- a/src/protocol/parser/pass_imports.rs +++ b/src/protocol/parser/pass_imports.rs @@ -181,14 +181,14 @@ impl PassImport { } } else if Some(TokenKind::OpenCurly) = next { // Importing multiple symbols + let mut end_of_list = iter.last_valid_pos(); consume_comma_separated( TokenKind::OpenCurly, TokenKind::CloseCurly, source, &mut iter, |source, iter| consume_symbol_and_maybe_alias( source, iter, ctx, &module_identifier.value, target_root_id ), - &mut self.found_symbols, "a symbol", "a list of symbols to import" + &mut self.found_symbols, "a symbol", "a list of symbols to import", Some(&mut end_of_list) )?; - let end_of_list = iter.last_valid_pos(); // Preallocate import import_id = ctx.heap.alloc_import(|this| Import::Symbols(ImportSymbols { diff --git a/src/protocol/parser/pass_symbols.rs b/src/protocol/parser/pass_symbols.rs index 94ee54a7c0c228d208db3434750b56b7351c7f4b..3210f1b578465bbc046d758b649bae82aa6c3cb5 100644 --- a/src/protocol/parser/pass_symbols.rs +++ b/src/protocol/parser/pass_symbols.rs @@ -185,7 +185,7 @@ impl PassSymbols { maybe_consume_comma_separated( TokenKind::OpenAngle, TokenKind::CloseAngle, &module.source, &mut iter, |source, iter| consume_ident_interned(source, iter, ctx), - &mut poly_vars, "a polymorphic variable" + &mut poly_vars, "a polymorphic variable", None )?; let ident_text = identifier.value.clone(); // because we need it later diff --git a/src/protocol/parser/symbol_table2.rs b/src/protocol/parser/symbol_table2.rs index 9242f9aaccb1348fb7cc46722ed977d0e023e690..b6b5668e65e700ec1ddb62d4861e57154d3de566 100644 --- a/src/protocol/parser/symbol_table2.rs +++ b/src/protocol/parser/symbol_table2.rs @@ -128,6 +128,20 @@ pub enum SymbolVariant { } impl SymbolVariant { + /// Returns the span at which the item was introduced. For an imported + /// item (all modules, and imported types) this returns the span of the + /// import. For a defined type this returns the span of the identifier + pub(crate) fn span_of_introduction(&self, heap: &Heap) -> InputSpan { + match self { + SymbolVariant::Module(v) => heap[v.introduced_at].span(), + SymbolVariant::Definition(v) => if let Some(import_id) = v.imported_at { + heap[import_id].span() + } else { + v.identifier_span + }, + } + } + pub(crate) fn as_module(&self) -> &SymbolModule { match self { SymbolVariant::Module(v) => v, diff --git a/src/protocol/parser/tokens.rs b/src/protocol/parser/tokens.rs index 57c8c5d8f2a1a1ebf42033a780eb73cb5bad06d4..a3bbf37c46dbda95f6bbe3c037c16368639b61e3 100644 --- a/src/protocol/parser/tokens.rs +++ b/src/protocol/parser/tokens.rs @@ -329,4 +329,15 @@ impl<'a> TokenIter<'a> { } } } + + /// Saves the current iteration position, may be passed to `load` to return + /// the iterator to a previous position. + pub(crate) fn save(&self) -> (usize, usize) { + (self.cur, self.end) + } + + pub(crate) fn load(&mut self, saved: (usize, usize)) { + self.cur = saved.0; + self.end = saved.1; + } } \ No newline at end of file diff --git a/src/protocol/parser/type_table.rs b/src/protocol/parser/type_table.rs index 4f8b078c61061da64096c3d2445f6647087142a6..cf5bf9b26476f407d1c90b3405a171e54e0f0e90 100644 --- a/src/protocol/parser/type_table.rs +++ b/src/protocol/parser/type_table.rs @@ -1,62 +1,9 @@ -/** -TypeTable - -Contains the type table: a datastructure that, when compilation succeeds, -contains a concrete type definition for each AST type definition. In general -terms the type table will go through the following phases during the compilation -process: - -1. The base type definitions are resolved after the parser phase has - finished. This implies that the AST is fully constructed, but not yet - annotated. -2. With the base type definitions resolved, the validation/linker phase will - use the type table (together with the symbol table) to disambiguate - terms (e.g. does an expression refer to a variable, an enum, a constant, - etc.) -3. During the type checking/inference phase the type table is used to ensure - that the AST contains valid use of types in expressions and statements. - At the same time type inference will find concrete instantiations of - polymorphic types, these will be stored in the type table as monomorphed - instantiations of a generic type. -4. After type checking and inference (and possibly when constructing byte - code) the type table will construct a type graph and solidify each - non-polymorphic type and monomorphed instantiations of polymorphic types - into concrete types. - -So a base type is defined by its (optionally polymorphic) representation in the -AST. A concrete type has concrete types for each of the polymorphic arguments. A -struct, enum or union may have polymorphic arguments but not actually be a -polymorphic type. This happens when the polymorphic arguments are not used in -the type definition itself. Similarly for functions/components: but here we just -check the arguments/return type of the signature. - -Apart from base types and concrete types, we also use the term "embedded type" -for types that are embedded within another type, such as a type of a struct -struct field or of a union variant. Embedded types may themselves have -polymorphic arguments and therefore form an embedded type tree. - -NOTE: for now a polymorphic definition of a function/component is illegal if the - polymorphic arguments are not used in the arguments/return type. It should - be legal, but we disallow it for now. - -TODO: Allow potentially cyclic datatypes and reject truly cyclic datatypes. -TODO: Allow for the full potential of polymorphism -TODO: Detect "true" polymorphism: for datatypes like structs/enum/unions this - is simple. For functions we need to check the entire body. Do it here? Or - do it somewhere else? -TODO: Do we want to check fn argument collision here, or in validation phase? -TODO: Make type table an on-demand thing instead of constructing all base types. -TODO: Cleanup everything, feels like a lot can be written cleaner and with less - assumptions on each function call. -// TODO: Review all comments -*/ - use std::fmt::{Formatter, Result as FmtResult}; use std::collections::{HashMap, VecDeque}; use crate::protocol::ast::*; -use crate::protocol::parser::symbol_table::{SymbolTable, Symbol}; -use crate::protocol::inputsource::*; +use crate::protocol::parser::symbol_table2::{SymbolTable, Symbol, SymbolScope}; +use crate::protocol::input_source2::{InputSource2 as InputSource, ParseError}; use crate::protocol::parser::*; //------------------------------------------------------------------------------ @@ -109,7 +56,7 @@ pub struct DefinedType { pub(crate) ast_root: RootId, pub(crate) ast_definition: DefinitionId, pub(crate) definition: DefinedTypeVariant, - pub(crate) poly_vars: Vec, + pub(crate) poly_vars: Vec, pub(crate) is_polymorph: bool, pub(crate) is_pointerlike: bool, // TODO: @optimize @@ -144,13 +91,6 @@ pub enum DefinedTypeVariant { Component(ComponentType) } -pub struct PolyVar { - identifier: Identifier, - /// Whether the polymorphic variables is used directly in the definition of - /// the type (not including bodies of function/component types) - is_in_use: bool, -} - impl DefinedTypeVariant { pub(crate) fn type_class(&self) -> TypeClass { match self { @@ -184,6 +124,11 @@ impl DefinedTypeVariant { } } +struct PolymorphicVariable { + identifier: Identifier, + is_in_use: bool, // a polymorphic argument may be defined, but not used by the type definition +} + /// `EnumType` is the classical C/C++ enum type. It has various variants with /// an assigned integer value. The integer values may be user-defined, /// compiler-defined, or a mix of the two. If a user assigns the same enum @@ -210,7 +155,7 @@ pub struct UnionType { pub struct UnionVariant { pub(crate) identifier: Identifier, - pub(crate) embedded: Vec, // zero-length does not have embedded values + pub(crate) embedded: Vec, // zero-length does not have embedded values pub(crate) tag_value: i64, } @@ -220,11 +165,11 @@ pub struct StructType { pub struct StructField { pub(crate) identifier: Identifier, - pub(crate) parser_type: ParserTypeId, + pub(crate) parser_type: ParserType, } pub struct FunctionType { - pub return_type: ParserTypeId, + pub return_types: Vec, pub arguments: Vec } @@ -235,7 +180,7 @@ pub struct ComponentType { pub struct FunctionArgument { identifier: Identifier, - parser_type: ParserTypeId, + parser_type: ParserType, } //------------------------------------------------------------------------------ @@ -282,20 +227,14 @@ impl TypeIterator { /// Result from attempting to resolve a `ParserType` using the symbol table and /// the type table. enum ResolveResult { - /// ParserType is a builtin type - BuiltIn, - /// ParserType points to a polymorphic argument, contains the index of the - /// polymorphic argument in the outermost definition (e.g. we may have - /// structs nested three levels deep, but in the innermost struct we can - /// only use the polyargs that are specified in the type definition of the - /// outermost struct). - PolyArg(usize), + Builtin, + PolymoprhicArgument, /// ParserType points to a user-defined type that is already resolved in the /// type table. - Resolved((RootId, DefinitionId)), + Resolved(RootId, DefinitionId), /// ParserType points to a user-defined type that is not yet resolved into /// the type table. - Unresolved((RootId, DefinitionId)) + Unresolved(RootId, DefinitionId) } pub(crate) struct TypeTable { @@ -313,11 +252,11 @@ pub(crate) struct TypeTable { pub(crate) struct TypeCtx<'a> { symbols: &'a SymbolTable, heap: &'a mut Heap, - modules: &'a [LexedModule] + modules: &'a [Module] } impl<'a> TypeCtx<'a> { - pub(crate) fn new(symbols: &'a SymbolTable, heap: &'a mut Heap, modules: &'a [LexedModule]) -> Self { + pub(crate) fn new(symbols: &'a SymbolTable, heap: &'a mut Heap, modules: &'a [Module]) -> Self { Self{ symbols, heap, modules } } } @@ -348,7 +287,6 @@ impl TypeTable { let reserve_size = ctx.heap.definitions.len(); self.lookup.reserve(reserve_size); - // TODO: @cleanup Rework this hack for root_idx in 0..ctx.modules.len() { let last_definition_idx = ctx.heap[ctx.modules[root_idx].root_id].definitions.len(); for definition_idx in 0..last_definition_idx { @@ -465,11 +403,13 @@ impl TypeTable { self.check_identifier_collision( ctx, root_id, &variants, |variant| &variant.identifier, "enum variant" )?; + + // Because we're parsing an enum, the programmer cannot put the + // polymorphic variables inside the variants. But the polymorphic + // variables might still be present as "marker types" self.check_poly_args_collision(ctx, root_id, &definition.poly_vars)?; + let poly_vars = Self::create_polymorphic_variables(&definition.poly_vars); - // Note: although we cannot have embedded type dependent on the - // polymorphic variables, they might still be present as tokens - let definition_id = definition.this.upcast(); self.lookup.insert(definition_id, DefinedType { ast_root: root_id, ast_definition: definition_id, @@ -477,7 +417,7 @@ impl TypeTable { variants, representation: Self::enum_tag_type(min_enum_value, max_enum_value) }), - poly_vars: self.create_initial_poly_vars(&definition.poly_vars), + poly_vars, is_polymorph: false, is_pointerlike: false, monomorphs: Vec::new() @@ -501,8 +441,8 @@ impl TypeTable { match &variant.value { UnionVariantValue::None => {}, UnionVariantValue::Embedded(embedded) => { - for embedded_id in embedded { - let resolve_result = self.resolve_base_parser_type(ctx, &definition.poly_vars, root_id, *embedded_id)?; + for parser_type in embedded { + let resolve_result = self.resolve_base_parser_type(ctx, root_id, parser_type)?; if !self.ingest_resolve_result(ctx, resolve_result)? { return Ok(false) } @@ -539,10 +479,11 @@ impl TypeTable { )?; self.check_poly_args_collision(ctx, root_id, &definition.poly_vars)?; - let mut poly_args = self.create_initial_poly_vars(&definition.poly_vars); + // Construct polymorphic variables and mark the ones that are in use + let mut poly_vars = Self::create_polymorphic_variables(&definition.poly_vars); for variant in &variants { - for embedded_id in &variant.embedded { - self.check_and_resolve_embedded_type_and_modify_poly_args(ctx, definition_id, &mut poly_args, root_id, *embedded_id)?; + for parser_type in &variant.embedded { + Self::mark_used_polymorphic_variables(&mut poly_vars, parser_type); } } let is_polymorph = poly_args.iter().any(|arg| arg.is_in_use); @@ -575,7 +516,7 @@ impl TypeTable { // Make sure all fields point to resolvable types for field_definition in &definition.fields { - let resolve_result = self.resolve_base_parser_type(ctx, &definition.poly_vars, root_id, field_definition.parser_type)?; + let resolve_result = self.resolve_base_parser_type(ctx, root_id, &field_definition.parser_type)?; if !self.ingest_resolve_result(ctx, resolve_result)? { return Ok(false) } @@ -586,7 +527,7 @@ impl TypeTable { for field_definition in &definition.fields { fields.push(StructField{ identifier: field_definition.field.clone(), - parser_type: field_definition.parser_type, + parser_type: field_definition.parser_type.clone(), }) } @@ -597,9 +538,9 @@ impl TypeTable { self.check_poly_args_collision(ctx, root_id, &definition.poly_vars)?; // Construct representation of polymorphic arguments - let mut poly_args = self.create_initial_poly_vars(&definition.poly_vars); + let mut poly_vars = Self::create_polymorphic_variables(&definition.poly_vars); for field in &fields { - self.check_and_resolve_embedded_type_and_modify_poly_args(ctx, definition_id, &mut poly_args, root_id, field.parser_type)?; + Self::mark_used_polymorphic_variables(&mut poly_vars, &field.parser_type); } let is_polymorph = poly_args.iter().any(|arg| arg.is_in_use); @@ -627,12 +568,10 @@ impl TypeTable { debug_assert!(!self.lookup.contains_key(&definition_id), "base function already resolved"); let definition = ctx.heap[definition_id].as_function(); - let return_type = definition.return_type; // Check the return type - let resolve_result = self.resolve_base_parser_type( - ctx, &definition.poly_vars, root_id, definition.return_type - )?; + debug_assert_eq!(definition.return_types.len(), 1, "not one return type"); // TODO: @ReturnValues + let resolve_result = self.resolve_base_parser_type(ctx, root_id, &definition.return_types[0])?; if !self.ingest_resolve_result(ctx, resolve_result)? { return Ok(false) } @@ -640,9 +579,7 @@ impl TypeTable { // Check the argument types for param_id in &definition.parameters { let param = &ctx.heap[*param_id]; - let resolve_result = self.resolve_base_parser_type( - ctx, &definition.poly_vars, root_id, param.parser_type - )?; + let resolve_result = self.resolve_base_parser_type(ctx, root_id, ¶m.parser_type)?; if !self.ingest_resolve_result(ctx, resolve_result)? { return Ok(false) } @@ -654,7 +591,7 @@ impl TypeTable { let param = &ctx.heap[*param_id]; arguments.push(FunctionArgument{ identifier: param.identifier.clone(), - parser_type: param.parser_type, + parser_type: param.parser_type.clone(), }) } @@ -665,13 +602,11 @@ impl TypeTable { self.check_poly_args_collision(ctx, root_id, &definition.poly_vars)?; // Construct polymorphic arguments - let mut poly_args = self.create_initial_poly_vars(&definition.poly_vars); - let return_type_id = definition.return_type; - self.check_and_resolve_embedded_type_and_modify_poly_args(ctx, definition_id, &mut poly_args, root_id, return_type_id)?; + let mut poly_vars = Self::create_polymorphic_variables(&definition.poly_vars); + Self::mark_used_polymorphic_variables(&mut poly_vars, &definition.return_types[0]); for argument in &arguments { - self.check_and_resolve_embedded_type_and_modify_poly_args(ctx, definition_id, &mut poly_args, root_id, argument.parser_type)?; + Self::mark_used_polymorphic_variables(&mut poly_vars, &argument.parser_type); } - let is_polymorph = poly_args.iter().any(|arg| arg.is_in_use); // Construct entry in type table @@ -679,7 +614,7 @@ impl TypeTable { ast_root: root_id, ast_definition: definition_id, definition: DefinedTypeVariant::Function(FunctionType{ - return_type, + return_types: definition.return_types.clone(), arguments, }), poly_vars: poly_args, @@ -704,9 +639,7 @@ impl TypeTable { // Check argument types for param_id in &definition.parameters { let param = &ctx.heap[*param_id]; - let resolve_result = self.resolve_base_parser_type( - ctx, &definition.poly_vars, root_id, param.parser_type - )?; + let resolve_result = self.resolve_base_parser_type(ctx, root_id, ¶m.parser_type)?; if !self.ingest_resolve_result(ctx, resolve_result)? { return Ok(false) } @@ -718,7 +651,7 @@ impl TypeTable { let param = &ctx.heap[*param_id]; arguments.push(FunctionArgument{ identifier: param.identifier.clone(), - parser_type: param.parser_type + parser_type: param.parser_type.clone() }) } @@ -729,12 +662,12 @@ impl TypeTable { self.check_poly_args_collision(ctx, root_id, &definition.poly_vars)?; // Construct polymorphic arguments - let mut poly_args = self.create_initial_poly_vars(&definition.poly_vars); + let mut poly_vars = Self::create_polymorphic_variables(&definition.poly_vars); for argument in &arguments { - self.check_and_resolve_embedded_type_and_modify_poly_args(ctx, definition_id, &mut poly_args, root_id, argument.parser_type)?; + Self::mark_used_polymorphic_variables(&mut poly_vars, &argument.parser_type)?; } - let is_polymorph = poly_args.iter().any(|v| v.is_in_use); + let is_polymorph = poly_vars.iter().any(|v| v.is_in_use); // Construct entry in type table self.lookup.insert(definition_id, DefinedType{ @@ -744,7 +677,7 @@ impl TypeTable { variant: component_variant, arguments, }), - poly_vars: poly_args, + poly_vars, is_polymorph, is_pointerlike: false, // TODO: @cyclic monomorphs: Vec::new(), @@ -760,14 +693,15 @@ impl TypeTable { /// that the type must be resolved first. fn ingest_resolve_result(&mut self, ctx: &TypeCtx, result: ResolveResult) -> Result { match result { - ResolveResult::BuiltIn | ResolveResult::PolyArg(_) => Ok(true), - ResolveResult::Resolved(_) => Ok(true), - ResolveResult::Unresolved((root_id, definition_id)) => { + ResolveResult::Builtin | ResolveResult::PolymoprhicArgument => Ok(true), + ResolveResult::Resolved(_, _) => Ok(true), + ResolveResult::Unresolved(root_id, definition_id) => { if self.iter.contains(root_id, definition_id) { // Cyclic dependency encountered // TODO: Allow this - let mut error = ParseError::new_error( - &ctx.modules[root_id.index as usize].source, ctx.heap[definition_id].position(), + let module_source = &ctx.modules[root_id.index as usize].source; + let mut error = ParseError::new_error_str_at_span( + module_source, ctx.heap[definition_id].identifier().span, "Evaluating this type definition results in a cyclic type" ); @@ -778,10 +712,8 @@ impl TypeTable { "Which depends on this definition" }; - error = error.with_postfixed_info( - &ctx.modules[root_id.index as usize].source, - ctx.heap[*definition_id].position(), msg - ); + let module_source = &ctx.modules[root_id.index as usize].source; + error = error.with_info_str_at_span(module_source, ctx.heap[*definition_id].identifier().span, msg); } Err(error) @@ -795,155 +727,58 @@ impl TypeTable { } } - /// Each type definition may consist of several embedded subtypes. This - /// function checks whether that embedded type is a builtin, a direct - /// reference to a polymorphic argument, or an (un)resolved type definition. - /// If the embedded type's symbol cannot be found then this function returns - /// an error. + /// Each type may consist of embedded types. If this type does not have a + /// fixed implementation (e.g. an input port may have an embedded type + /// indicating the type of messages, but it always exists in the runtime as + /// a port identifier, so it has a fixed implementation) then this function + /// will traverse the embedded types to ensure all of them are resolved. /// - /// If the embedded type is resolved, then one always receives the type's - /// (module, definition) tuple. If any of the types in the embedded type's - /// tree is not yet resolved, then one may receive a (module, definition) - /// tuple that does not correspond to the `parser_type_id` passed into this - /// function. - fn resolve_base_parser_type(&mut self, ctx: &TypeCtx, poly_vars: &Vec, root_id: RootId, parser_type_id: ParserTypeId) -> Result { + /// Hence if one checks a particular parser type for being resolved, one may + /// get back a result value indicating an embedded type (with a different + /// DefinitionId) is unresolved. + fn resolve_base_parser_type(&mut self, ctx: &TypeCtx, root_id: RootId, parser_type: &ParserType) -> Result { + // Note that as we iterate over the elements of a use ParserTypeVariant as PTV; - // Prepping iterator - self.parser_type_iter.clear(); - self.parser_type_iter.push_back(parser_type_id); - - // Result for the very first time we resolve a + // Result for the very first time we resolve a type (i.e the outer type + // that we're actually looking up) let mut resolve_result = None; let mut set_resolve_result = |v: ResolveResult| { if resolve_result.is_none() { resolve_result = Some(v); } }; - 'resolve_loop: while let Some(parser_type_id) = self.parser_type_iter.pop_back() { - let parser_type = &ctx.heap[parser_type_id]; - - match &parser_type.variant { - // Builtin types. An array is a builtin as it is implemented as a - // couple of pointers, so we do not require the subtype to be fully - // resolved. Similar for input/output ports. - PTV::Array(_) | PTV::Input(_) | PTV::Output(_) | PTV::Message | - PTV::Bool | PTV::Byte | PTV::Short | PTV::Int | PTV::Long | - PTV::String => { - set_resolve_result(ResolveResult::BuiltIn); + for element in parser_type.elements.iter() { + match element.variant { + PTV::Message | PTV::Bool | + PTV::UInt8 | PTV::UInt16 | PTV::UInt32 | PTV::UInt64 | + PTV::SInt8 | PTV::SInt16 | PTV::SInt32 | PTV::SInt64 | + PTV::Character | PTV::String | + PTV::Array | PTV::Input | PTV::Output => { + // Nothing to do: these are builtin types or types with a + // fixed implementation + set_resolve_result(ResolveResult::Builtin); + }, + PTV::IntegerLiteral | PTV::Inferred => { + // As we're parsing the type definitions where these kinds + // of types are impossible/disallowed to express: + unreachable!("illegal ParserTypeVariant within type definition"); }, - // IntegerLiteral types and the inferred marker are not allowed in - // definitions of types - PTV::IntegerLiteral | - PTV::Inferred => { - debug_assert!(false, "Encountered illegal ParserTypeVariant within type definition"); - unreachable!(); + PTV::PolymorphicArgument(_, _) => { + set_resolve_result(ResolveResult::PolymoprhicArgument); }, - // Symbolic type, make sure its base type, and the base types - // of all members of the embedded type tree are resolved. We - // don't care about monomorphs yet. - PTV::Symbolic(symbolic) => { - // Check if the symbolic type is one of the definition's - // polymorphic arguments. If so then we can halt the - // execution - for (poly_arg_idx, poly_arg) in poly_vars.iter().enumerate() { - if symbolic.identifier.matches_identifier(poly_arg) { - set_resolve_result(ResolveResult::PolyArg(poly_arg_idx)); - continue 'resolve_loop; - } - } - - // Lookup the definition in the symbol table - let (symbol, mut ident_iter) = ctx.symbols.resolve_namespaced_identifier(root_id, &symbolic.identifier); - if symbol.is_none() { - return Err(ParseError::new_error( - &ctx.modules[root_id.index as usize].source, symbolic.identifier.position, - "Could not resolve type" + PTV::Definition(embedded_id, _) => { + let definition = &ctx.heap[embedded_id]; + if !(definition.is_struct() || definition.is_enum() || definition.is_union()) { + let module_source = &ctx.modules[root_id.index as usize].source; + return Err(ParseError::new_error_str_at_span( + module_source, element.full_span, "expected a datatype (struct, enum or union)" )) } - let symbol_value = symbol.unwrap(); - let module_source = &ctx.modules[root_id.index as usize].source; - - match symbol_value.symbol { - Symbol::Namespace(_) => { - // Reference to a namespace instead of a type - let last_ident = ident_iter.prev(); - return if ident_iter.num_remaining() == 0 { - // Could also have polymorphic args, but we - // don't care, just throw this error: - Err(ParseError::new_error( - module_source, symbolic.identifier.position, - "Expected a type, got a module name" - )) - } else if last_ident.is_some() && last_ident.map(|(_, poly_args)| poly_args.is_some()).unwrap() { - // Halted at a namespaced because we encountered - // polymorphic arguments - Err(ParseError::new_error( - module_source, symbolic.identifier.position, - "Illegal specification of polymorphic arguments to a module name" - )) - } else { - // Impossible (with the current implementation - // of the symbol table) - unreachable!( - "Got namespace symbol with {} returned symbols from {}", - ident_iter.num_returned(), - &String::from_utf8_lossy(&symbolic.identifier.value) - ); - } - }, - Symbol::Definition((root_id, definition_id)) => { - let definition = &ctx.heap[definition_id]; - if ident_iter.num_remaining() > 0 { - // Namespaced identifier is longer than the type - // we found. Return the appropriate message - return if definition.is_struct() || definition.is_enum() { - Err(ParseError::new_error( - module_source, symbolic.identifier.position, - &format!( - "Unknown type '{}', did you mean to use '{}'?", - String::from_utf8_lossy(&symbolic.identifier.value), - String::from_utf8_lossy(&definition.identifier().value) - ) - )) - } else { - Err(ParseError::new_error( - module_source, symbolic.identifier.position, - "Unknown datatype" - )) - } - } - - // Found a match, make sure it is a datatype - if !(definition.is_struct() || definition.is_enum() || definition.is_union()) { - return Err(ParseError::new_error( - module_source, symbolic.identifier.position, - "Embedded types must be datatypes (structs or enums)" - )) - } - - // Found a struct/enum definition - if !self.lookup.contains_key(&definition_id) { - // Type is not yet resoled, immediately return - // this - return Ok(ResolveResult::Unresolved((root_id, definition_id))); - } - - // Type is resolved, so set as result, but continue - // iterating over the parser types in the embedded - // type's tree - set_resolve_result(ResolveResult::Resolved((root_id, definition_id))); - - // Note: because we're resolving parser types, not - // embedded types, we're parsing a tree, so we can't - // get stuck in a cyclic loop. - let last_ident = ident_iter.prev(); - if let Some((_, Some(poly_args))) = last_ident { - for poly_arg_type_id in poly_args { - self.parser_type_iter.push_back(*poly_arg_type_id); - } - } - } + if self.lookup.contains_key(&embedded_id) { + set_resolve_result(ResolveResult::Resolved(definition.defined_in(), embedded_id)) + } else { + return Ok(ResolveResult::Unresolved(definition.defined_in(), embedded_id)) } } } @@ -954,127 +789,6 @@ impl TypeTable { return Ok(resolve_result.unwrap()) } - fn create_initial_poly_vars(&self, poly_args: &[Identifier]) -> Vec { - poly_args - .iter() - .map(|v| PolyVar{ identifier: v.clone(), is_in_use: false }) - .collect() - } - - /// This function modifies the passed `poly_args` by checking the embedded - /// type tree. This should be called after `resolve_base_parser_type` is - /// called on each node in this tree: we assume that each symbolic type was - /// resolved to either a polymorphic arg or a definition. - /// - /// This function will also make sure that if the embedded type has - /// polymorphic variables itself, that the number of polymorphic variables - /// matches the number of arguments in the associated definition. - /// - /// Finally, for all embedded types (which includes function/component - /// arguments and return types) in type definitions we will modify the AST - /// when the embedded type is a polymorphic variable or points to another - /// user-defined type. - fn check_and_resolve_embedded_type_and_modify_poly_args( - &mut self, ctx: &mut TypeCtx, - type_definition_id: DefinitionId, poly_args: &mut [PolyVar], - root_id: RootId, embedded_type_id: ParserTypeId, - ) -> Result<(), ParseError> { - use ParserTypeVariant as PTV; - - self.parser_type_iter.clear(); - self.parser_type_iter.push_back(embedded_type_id); - - 'type_loop: while let Some(embedded_type_id) = self.parser_type_iter.pop_back() { - let embedded_type = &mut ctx.heap[embedded_type_id]; - - match &mut embedded_type.variant { - PTV::Message | PTV::Bool | - PTV::Byte | PTV::Short | PTV::Int | PTV::Long | - PTV::String => { - // Builtins, no modification/iteration required - }, - PTV::IntegerLiteral | PTV::Inferred => { - // TODO: @hack Allowed for now so we can continue testing - // the parser/lexer - // debug_assert!(false, "encountered illegal parser type during ParserType/PolyArg modification"); - // unreachable!(); - }, - PTV::Array(subtype_id) | - PTV::Input(subtype_id) | - PTV::Output(subtype_id) => { - // Outer type is fixed, but inner type might be symbolic - self.parser_type_iter.push_back(*subtype_id); - }, - PTV::Symbolic(symbolic) => { - for (poly_arg_idx, poly_arg) in poly_args.iter_mut().enumerate() { - if symbolic.identifier.matches_identifier(&poly_arg.identifier) { - poly_arg.is_in_use = true; - // TODO: If we allow higher-kinded types in the future, - // then we can't continue here, but must resolve the - // polyargs as well - debug_assert!(symbolic.identifier.get_poly_args().is_none(), "got polymorphic arguments to a polymorphic variable"); - debug_assert!(symbolic.variant.is_none(), "symbolic parser type's variant already resolved"); - symbolic.variant = Some(SymbolicParserTypeVariant::PolyArg(type_definition_id, poly_arg_idx)); - continue 'type_loop; - } - } - - // Must match a definition - let (symbol, ident_iter) = ctx.symbols.resolve_namespaced_identifier(root_id, &symbolic.identifier); - debug_assert!(symbol.is_some(), "could not resolve symbolic parser type when determining poly args"); - let symbol = symbol.unwrap(); - debug_assert_eq!(ident_iter.num_remaining(), 0, "no exact symbol match when determining poly args"); - let (_root_id, definition_id) = symbol.as_definition().unwrap(); - - // Must be a struct, enum, or union, we checked this - let defined_type = self.lookup.get(&definition_id).unwrap(); - let (_, poly_args) = ident_iter.prev().unwrap(); - let poly_args = poly_args.unwrap_or_default(); - - if cfg!(debug_assertions) { - // Everything here should already be checked in - // `resolve_base_parser_type`. - let type_class = defined_type.definition.type_class(); - debug_assert!( - type_class == TypeClass::Struct || type_class == TypeClass::Enum || type_class == TypeClass::Union, - "embedded type's class is not struct, enum or union" - ); - debug_assert_eq!(poly_args.len(), symbolic.identifier.poly_args.len()); - } - - if poly_args.len() != defined_type.poly_vars.len() { - // Mismatch in number of polymorphic arguments. This is - // not allowed in type definitions (no inference is - // allowed within type definitions, only in bodies of - // functions/components). - let module_source = &ctx.modules[root_id.index as usize].source; - let number_args_msg = if defined_type.poly_vars.is_empty() { - String::from("is not polymorphic") - } else { - format!("accepts {} polymorphic arguments", defined_type.poly_vars.len()) - }; - - return Err(ParseError::new_error( - module_source, symbolic.identifier.position, - &format!( - "The type '{}' {}, but {} polymorphic arguments were provided", - String::from_utf8_lossy(&symbolic.identifier.strip_poly_args()), - number_args_msg, poly_args.len() - ) - )); - } - - self.parser_type_iter.extend(poly_args); - debug_assert!(symbolic.variant.is_none(), "symbolic parser type's variant already resolved"); - symbolic.variant = Some(SymbolicParserTypeVariant::Definition(definition_id)); - } - } - } - - // All nodes in the embedded type tree were valid - Ok(()) - } - /// Go through a list of identifiers and ensure that all identifiers have /// unique names fn check_identifier_collision &Identifier>( @@ -1086,10 +800,10 @@ impl TypeTable { let other_item_ident = getter(other_item); if item_ident == other_item_ident { let module_source = &ctx.modules[root_id.index as usize].source; - return Err(ParseError::new_error( - module_source, item_ident.position, &format!("This {} is defined more than once", item_name) - ).with_postfixed_info( - module_source, other_item_ident.position, &format!("The other {} is defined here", item_name) + return Err(ParseError::new_error_at_span( + module_source, item_ident.span, format!("This {} is defined more than once", item_name) + ).with_info_at_span( + module_source, other_item_ident.span, format!("The other {} is defined here", item_name) )); } } @@ -1110,11 +824,11 @@ impl TypeTable { for other_poly_arg in &poly_args[..arg_idx] { if poly_arg == other_poly_arg { let module_source = &ctx.modules[root_id.index as usize].source; - return Err(ParseError::new_error( - module_source, poly_arg.position, + return Err(ParseError::new_error_str_at_span( + module_source, poly_arg.span, "This polymorphic argument is defined more than once" ).with_postfixed_info( - module_source, other_poly_arg.position, + module_source, other_poly_arg.span, "It conflicts with this polymorphic argument" )); } @@ -1122,14 +836,15 @@ impl TypeTable { // Check if identifier conflicts with a symbol defined or imported // in the current module - if let Some(symbol) = ctx.symbols.resolve_symbol(root_id, &poly_arg.value) { + if let Some(symbol) = ctx.symbols.get_symbol_by_name(SymbolScope::Module(root_id), poly_arg.value.as_bytes()) { // We have a conflict let module_source = &ctx.modules[root_id.index as usize].source; - return Err(ParseError::new_error( - module_source, poly_arg.position, + let introduction_span = symbol.variant.span_of_introduction(ctx.heap); + return Err(ParseError::new_error_str_at_span( + module_source, poly_arg.span, "This polymorphic argument conflicts with another symbol" - ).with_postfixed_info( - module_source, symbol.position, + ).with_info_str_at_span( + module_source, introduction_span, "It conflicts due to this symbol" )); } @@ -1143,6 +858,23 @@ impl TypeTable { // Small utilities //-------------------------------------------------------------------------- + fn create_polymorphic_variables(variables: &[Identifier]) -> Vec { + let mut result = Vec::with_capacity(variables.len()); + for variable in variables.iter() { + result.push(PolymorphicVariable{ identifier: variable.clone(), is_in_use: false }); + } + + result + } + + fn mark_used_polymorphic_variables(poly_vars: &mut Vec, parser_type: &ParserType) { + for element in & parser_type.elements { + if let ParserTypeVariant::PolymorphicArgument(_, idx) = element { + poly_vars[*idx].is_in_use = true; + } + } + } + fn enum_tag_type(min_tag_value: i64, max_tag_value: i64) -> PrimitiveType { // TODO: @consistency tag values should be handled correctly debug_assert!(min_tag_value <= max_tag_value); diff --git a/src/protocol/parser/visitor_linker.rs b/src/protocol/parser/visitor_linker.rs index db99c02361888c1e6884fdfeaf6c4e3d46cafb4a..7b48b763b7fff986f5c08aaed6f10250c317fab3 100644 --- a/src/protocol/parser/visitor_linker.rs +++ b/src/protocol/parser/visitor_linker.rs @@ -18,9 +18,9 @@ use super::visitor::{ #[derive(PartialEq, Eq)] enum DefinitionType { None, - Primitive(ComponentId), - Composite(ComponentId), - Function(FunctionId) + Primitive(ComponentDefinitionId), + Composite(ComponentDefinitionId), + Function(FunctionDefinitionId) } impl DefinitionType { @@ -79,8 +79,6 @@ pub(crate) struct ValidityAndLinkerVisitor { expression_buffer: Vec, // Yet another buffer, now with parser type IDs, similar to above parser_type_buffer: Vec, - // Statements to insert after the breadth pass in a single block - insert_buffer: Vec<(u32, StatementId)>, } impl ValidityAndLinkerVisitor { @@ -96,7 +94,6 @@ impl ValidityAndLinkerVisitor { statement_buffer: Vec::with_capacity(STMT_BUFFER_INIT_CAPACITY), expression_buffer: Vec::with_capacity(EXPR_BUFFER_INIT_CAPACITY), parser_type_buffer: Vec::with_capacity(TYPE_BUFFER_INIT_CAPACITY), - insert_buffer: Vec::with_capacity(32), } } @@ -111,7 +108,6 @@ impl ValidityAndLinkerVisitor { self.statement_buffer.clear(); self.expression_buffer.clear(); self.parser_type_buffer.clear(); - self.insert_buffer.clear(); } /// Debug call to ensure that we didn't make any mistakes in any of the @@ -120,7 +116,6 @@ impl ValidityAndLinkerVisitor { debug_assert!(self.statement_buffer.is_empty()); debug_assert!(self.expression_buffer.is_empty()); debug_assert!(self.parser_type_buffer.is_empty()); - debug_assert!(self.insert_buffer.is_empty()); } } @@ -129,7 +124,7 @@ impl Visitor2 for ValidityAndLinkerVisitor { // Definition visitors //-------------------------------------------------------------------------- - fn visit_component_definition(&mut self, ctx: &mut Ctx, id: ComponentId) -> VisitorResult { + fn visit_component_definition(&mut self, ctx: &mut Ctx, id: ComponentDefinitionId) -> VisitorResult { self.reset_state(); self.def_type = match &ctx.heap[id].variant { @@ -158,15 +153,15 @@ impl Visitor2 for ValidityAndLinkerVisitor { // Visit statements in component body let body_id = ctx.heap[id].body; self.performing_breadth_pass = true; - self.visit_stmt(ctx, body_id)?; + self.visit_block_stmt(ctx, body_id)?; self.performing_breadth_pass = false; - self.visit_stmt(ctx, body_id)?; + self.visit_block_stmt(ctx, body_id)?; self.check_post_definition_state(); Ok(()) } - fn visit_function_definition(&mut self, ctx: &mut Ctx, id: FunctionId) -> VisitorResult { + fn visit_function_definition(&mut self, ctx: &mut Ctx, id: FunctionDefinitionId) -> VisitorResult { self.reset_state(); // Set internal statement indices @@ -194,9 +189,9 @@ impl Visitor2 for ValidityAndLinkerVisitor { // Visit statements in function body let body_id = ctx.heap[id].body; self.performing_breadth_pass = true; - self.visit_stmt(ctx, body_id)?; + self.visit_block_stmt(ctx, body_id)?; self.performing_breadth_pass = false; - self.visit_stmt(ctx, body_id)?; + self.visit_block_stmt(ctx, body_id)?; self.check_post_definition_state(); Ok(()) @@ -262,20 +257,7 @@ impl Visitor2 for ValidityAndLinkerVisitor { } fn visit_if_stmt(&mut self, ctx: &mut Ctx, id: IfStatementId) -> VisitorResult { - if self.performing_breadth_pass { - let position = ctx.heap[id].position; - let end_if_id = ctx.heap.alloc_end_if_statement(|this| { - EndIfStatement { - this, - start_if: id, - position, - next: None, - } - }); - let stmt = &mut ctx.heap[id]; - stmt.end_if = Some(end_if_id); - self.insert_buffer.push((self.relative_pos_in_block + 1, end_if_id.upcast())); - } else { + if !self.performing_breadth_pass { // Traverse expression and bodies let (test_id, true_id, false_id) = { let stmt = &ctx.heap[id]; @@ -296,20 +278,8 @@ impl Visitor2 for ValidityAndLinkerVisitor { fn visit_while_stmt(&mut self, ctx: &mut Ctx, id: WhileStatementId) -> VisitorResult { if self.performing_breadth_pass { - let position = ctx.heap[id].position; - let end_while_id = ctx.heap.alloc_end_while_statement(|this| { - EndWhileStatement { - this, - start_while: id, - position, - next: None, - } - }); let stmt = &mut ctx.heap[id]; - stmt.end_while = Some(end_while_id); stmt.in_sync = self.in_sync.clone(); - - self.insert_buffer.push((self.relative_pos_in_block + 1, end_while_id.upcast())); } else { let (test_id, body_id) = { let stmt = &ctx.heap[id]; @@ -380,17 +350,6 @@ impl Visitor2 for ValidityAndLinkerVisitor { "Synchronous statements may only be used in primitive components" )); } - - // Append SynchronousEnd pseudo-statement - let sync_end_id = ctx.heap.alloc_end_synchronous_statement(|this| EndSynchronousStatement{ - this, - position: cur_sync_position, - start_sync: id, - next: None, - }); - let sync_start = &mut ctx.heap[id]; - sync_start.end_sync = Some(sync_end_id); - self.insert_buffer.push((self.relative_pos_in_block + 1, sync_end_id.upcast())); } else { let sync_body = ctx.heap[id].body; let old = self.in_sync.replace(id); @@ -1192,13 +1151,6 @@ impl ValidityAndLinkerVisitor { self.visit_stmt(ctx, self.statement_buffer[stmt_idx])?; } - if !self.insert_buffer.is_empty() { - let body = &mut ctx.heap[id]; - for (insert_idx, (pos, stmt)) in self.insert_buffer.drain(..).enumerate() { - body.statements.insert(pos as usize + insert_idx, stmt); - } - } - // And the depth pass. Because we're not actually visiting any inserted // nodes because we're using the statement buffer, we may safely use the // relative_pos_in_block counter. @@ -1212,140 +1164,11 @@ impl ValidityAndLinkerVisitor { self.relative_pos_in_block = old_relative_pos; // Pop statement buffer - debug_assert!(self.insert_buffer.is_empty(), "insert buffer not empty after depth pass"); self.statement_buffer.truncate(old_num_stmts); Ok(()) } - /// Visits a particular ParserType in the AST and resolves temporary and - /// implicitly inferred types into the appropriate tree. Note that a - /// ParserType node is a tree. Only call this function on the root node of - /// that tree to prevent doing work more than once. - fn visit_parser_type_without_buffer_cleanup(&mut self, ctx: &mut Ctx, id: ParserTypeId) -> VisitorResult { - use ParserTypeVariant as PTV; - debug_assert!(!self.performing_breadth_pass); - - let init_num_types = self.parser_type_buffer.len(); - self.parser_type_buffer.push(id); - - 'resolve_loop: while self.parser_type_buffer.len() > init_num_types { - let parser_type_id = self.parser_type_buffer.pop().unwrap(); - let parser_type = &ctx.heap[parser_type_id]; - - let (symbolic_pos, symbolic_variant, num_inferred_to_allocate) = match &parser_type.variant { - PTV::Message | PTV::Bool | - PTV::Byte | PTV::Short | PTV::Int | PTV::Long | - PTV::String | - PTV::IntegerLiteral | PTV::Inferred => { - // Builtin types or types that do not require recursion - continue 'resolve_loop; - }, - PTV::Array(subtype_id) | - PTV::Input(subtype_id) | - PTV::Output(subtype_id) => { - // Requires recursing - self.parser_type_buffer.push(*subtype_id); - continue 'resolve_loop; - }, - PTV::Symbolic(symbolic) => { - // Retrieve poly_vars from function/component definition to - // match against. - let (definition_id, poly_vars) = match self.def_type { - DefinitionType::None => unreachable!(), - DefinitionType::Primitive(id) => (id.upcast(), &ctx.heap[id].poly_vars), - DefinitionType::Composite(id) => (id.upcast(), &ctx.heap[id].poly_vars), - DefinitionType::Function(id) => (id.upcast(), &ctx.heap[id].poly_vars), - }; - - let mut symbolic_variant = None; - for (poly_var_idx, poly_var) in poly_vars.iter().enumerate() { - if symbolic.identifier.matches_identifier(poly_var) { - // Type refers to a polymorphic variable. - // TODO: @hkt Maybe allow higher-kinded types? - if symbolic.identifier.get_poly_args().is_some() { - return Err(ParseError::new_error( - &ctx.module.source, symbolic.identifier.position, - "Polymorphic arguments to a polymorphic variable (higher-kinded types) are not allowed (yet)" - )); - } - symbolic_variant = Some(SymbolicParserTypeVariant::PolyArg(definition_id, poly_var_idx)); - } - } - - if let Some(symbolic_variant) = symbolic_variant { - // Identifier points to a polymorphic argument - (symbolic.identifier.position, symbolic_variant, 0) - } else { - // Must be a user-defined type, otherwise an error - let (found_type, ident_iter) = find_type_definition( - &ctx.symbols, &ctx.types, ctx.module.root_id, &symbolic.identifier - ).as_parse_error(&ctx.module.source)?; - - // TODO: @function_ptrs: Allow function pointers at some - // point in the future - if found_type.definition.type_class().is_proc_type() { - return Err(ParseError::new_error( - &ctx.module.source, symbolic.identifier.position, - &format!( - "This identifier points to a {} type, expected a datatype", - found_type.definition.type_class() - ) - )); - } - - // If the type is polymorphic then we have two cases: if - // the programmer did not specify the polyargs then we - // assume we're going to infer all of them. Otherwise we - // make sure that they match in count. - let (_, poly_args) = ident_iter.prev().unwrap(); - let num_to_infer = match_polymorphic_args_to_vars( - found_type, poly_args, symbolic.identifier.position - ).as_parse_error(&ctx.heap, &ctx.module.source)?; - - ( - symbolic.identifier.position, - SymbolicParserTypeVariant::Definition(found_type.ast_definition), - num_to_infer - ) - } - } - }; - - // If here then type is symbolic, perform a mutable borrow (and do - // some rust shenanigans) to set the required information. - for _ in 0..num_inferred_to_allocate { - // TODO: @hack, not very user friendly to manually allocate - // `inferred` ParserTypes with the InputPosition of the - // symbolic type's identifier. - // We reuse the `parser_type_buffer` to temporarily store these - // and we'll take them out later - self.parser_type_buffer.push(ctx.heap.alloc_parser_type(|this| ParserType{ - this, - pos: symbolic_pos, - variant: ParserTypeVariant::Inferred, - })); - } - - if let PTV::Symbolic(symbolic) = &mut ctx.heap[parser_type_id].variant { - if num_inferred_to_allocate != 0 { - symbolic.poly_args2.reserve(num_inferred_to_allocate); - for _ in 0..num_inferred_to_allocate { - symbolic.poly_args2.push(self.parser_type_buffer.pop().unwrap()); - } - } else if !symbolic.identifier.poly_args.is_empty() { - symbolic.poly_args2.extend(&symbolic.identifier.poly_args); - self.parser_type_buffer.extend(&symbolic.poly_args2); - } - symbolic.variant = Some(symbolic_variant); - } else { - unreachable!(); - } - } - - Ok(()) - } - //-------------------------------------------------------------------------- // Utilities //-------------------------------------------------------------------------- @@ -1748,92 +1571,6 @@ impl ValidityAndLinkerVisitor { Ok(target) } - // TODO: @cleanup, merge with function below - fn visit_call_poly_args(&mut self, ctx: &mut Ctx, call_id: CallExpressionId) -> VisitorResult { - // TODO: @token Revisit when tokenizer is implemented - let call_expr = &mut ctx.heap[call_id]; - if let Method::Symbolic(symbolic) = &mut call_expr.method { - if let Some(poly_args) = symbolic.identifier.get_poly_args() { - call_expr.poly_args.extend(poly_args); - } - } - - let call_expr = &ctx.heap[call_id]; - - // Determine the polyarg signature - let num_expected_poly_args = match &call_expr.method { - Method::Create => { - 0 - }, - Method::Fires => { - 1 - }, - Method::Get => { - 1 - }, - Method::Put => { - 1 - } - Method::Symbolic(symbolic) => { - // Retrieve type and make sure number of specified polymorphic - // arguments is correct - - let definition = &ctx.heap[symbolic.definition.unwrap()]; - match definition { - Definition::Function(definition) => definition.poly_vars.len(), - Definition::Component(definition) => definition.poly_vars.len(), - _ => { - debug_assert!(false, "expected function or component definition while visiting call poly args"); - unreachable!(); - } - } - } - }; - - // We allow zero polyargs to imply all args are inferred. Otherwise the - // number of arguments must be equal - if call_expr.poly_args.is_empty() { - if num_expected_poly_args != 0 { - // Infer all polyargs - // TODO: @cleanup Not nice to use method position as implicitly - // inferred parser type pos. - let pos = call_expr.position(); - for _ in 0..num_expected_poly_args { - self.parser_type_buffer.push(ctx.heap.alloc_parser_type(|this| ParserType { - this, - pos, - variant: ParserTypeVariant::Inferred, - })); - } - - let call_expr = &mut ctx.heap[call_id]; - call_expr.poly_args.reserve(num_expected_poly_args); - for _ in 0..num_expected_poly_args { - call_expr.poly_args.push(self.parser_type_buffer.pop().unwrap()); - } - } - Ok(()) - } else if call_expr.poly_args.len() == num_expected_poly_args { - // Number of args is not 0, so parse all the specified ParserTypes - let old_num_types = self.parser_type_buffer.len(); - self.parser_type_buffer.extend(&call_expr.poly_args); - while self.parser_type_buffer.len() > old_num_types { - let parser_type_id = self.parser_type_buffer.pop().unwrap(); - self.visit_parser_type(ctx, parser_type_id)?; - } - self.parser_type_buffer.truncate(old_num_types); - Ok(()) - } else { - return Err(ParseError::new_error( - &ctx.module.source, call_expr.position, - &format!( - "Expected {} polymorphic arguments (or none, to infer them), but {} were specified", - num_expected_poly_args, call_expr.poly_args.len() - ) - )); - } - } - fn visit_literal_poly_args(&mut self, ctx: &mut Ctx, lit_id: LiteralExpressionId) -> VisitorResult { // TODO: @token Revisit when tokenizer is implemented let literal_expr = &mut ctx.heap[lit_id];