diff --git a/language_spec.md b/language_spec.md index 3af00076806801b64133a5c8527509989206d86f..0e45dbf4ac71ae535689eb6b144b172bfd766e79 100644 --- a/language_spec.md +++ b/language_spec.md @@ -101,7 +101,7 @@ keyword = "while" | "break" | "continue" | "return" | "synchronous" | "assert" | "goto" | "skip" | "new" | "let" -builtin = "put" | "get" | "fires" | "create" | "sizeof" | "assert" +builtin = "put" | "get" | "fires" | "create" | "assert" identifier = identifier-any WITHOUT (keyword | builtin) // Identifier with any number of prefixed namespaces diff --git a/src/protocol/arena.rs b/src/protocol/arena.rs index 2120f5d9b983c1af93ba6530e5a6565e15d101eb..86876643d3dd9c922c2fb748de5cc52403422e2f 100644 --- a/src/protocol/arena.rs +++ b/src/protocol/arena.rs @@ -52,6 +52,9 @@ impl Arena { pub fn iter(&self) -> impl Iterator { self.store.iter() } + pub fn len(&self) -> usize { + self.store.len() + } } impl core::ops::Index> for Arena { type Output = T; diff --git a/src/protocol/ast.rs b/src/protocol/ast.rs index f7a570318056ec6c02182e5188fc14bb97974fda..7845de8a6c71c50f035bfd9390611efe79831d2b 100644 --- a/src/protocol/ast.rs +++ b/src/protocol/ast.rs @@ -40,7 +40,7 @@ impl LocalId { } } -#[derive(Debug, Clone, Copy, PartialEq, serde::Serialize, serde::Deserialize)] +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, serde::Serialize, serde::Deserialize)] pub struct DefinitionId(Id); #[derive(Debug, Clone, Copy, PartialEq, serde::Serialize, serde::Deserialize)] @@ -79,24 +79,6 @@ impl FunctionId { } } -#[derive(Debug, Clone, Copy, PartialEq, serde::Serialize, serde::Deserialize)] -pub struct CompositeId(ComponentId); - -impl CompositeId { - pub fn upcast(self) -> ComponentId { - self.0 - } -} - -#[derive(Debug, Clone, Copy, PartialEq, serde::Serialize, serde::Deserialize)] -pub struct PrimitiveId(ComponentId); - -impl PrimitiveId { - pub fn upcast(self) -> ComponentId { - self.0 - } -} - #[derive(Debug, Clone, Copy, PartialEq, serde::Serialize, serde::Deserialize)] pub struct StatementId(Id); @@ -406,7 +388,7 @@ impl ImportedDeclarationId { #[derive(Debug, serde::Serialize, serde::Deserialize)] pub struct Heap { // Allocators - // #[serde[skip]] string_alloc: StringAllocator, + // #[serde(skip)] string_alloc: StringAllocator, // Root arena, contains the entry point for different modules. Each root // contains lists of IDs that correspond to the other arenas. protocol_descriptions: Arena, @@ -733,19 +715,12 @@ impl Heap { Definition::Enum(f(EnumId(DefinitionId(id)))) }))) } - pub fn alloc_composite(&mut self, f: impl FnOnce(CompositeId) -> Composite) -> CompositeId { - CompositeId(ComponentId(DefinitionId(self.definitions.alloc_with_id(|id| { - Definition::Component(Component::Composite(f(CompositeId(ComponentId(DefinitionId( - id, - )))))) - })))) - } - pub fn alloc_primitive(&mut self, f: impl FnOnce(PrimitiveId) -> Primitive) -> PrimitiveId { - PrimitiveId(ComponentId(DefinitionId(self.definitions.alloc_with_id(|id| { - Definition::Component(Component::Primitive(f(PrimitiveId(ComponentId(DefinitionId( - id, - )))))) - })))) + pub fn alloc_component(&mut self, f: impl FnOnce(ComponentId) -> Component) -> ComponentId { + ComponentId(DefinitionId(self.definitions.alloc_with_id(|id| { + Definition::Component(f(ComponentId( + DefinitionId(id), + ))) + }))) } pub fn alloc_function(&mut self, f: impl FnOnce(FunctionId) -> Function) -> FunctionId { FunctionId(DefinitionId( @@ -864,20 +839,6 @@ impl Index for Heap { } } -impl Index for Heap { - type Output = Composite; - fn index(&self, index: CompositeId) -> &Self::Output { - &self.definitions[((index.0).0).0].as_composite() - } -} - -impl Index for Heap { - type Output = Primitive; - fn index(&self, index: PrimitiveId) -> &Self::Output { - &self.definitions[((index.0).0).0].as_primitive() - } -} - impl Index for Heap { type Output = Statement; fn index(&self, index: StatementId) -> &Self::Output { @@ -1199,6 +1160,16 @@ impl Root { } None } + pub fn get_declaration_namespaced(&self, h: &Heap, id: &NamespacedIdentifier) -> Option { + for declaration_id in self.declarations.iter() { + let declaration = &h[*declaration_id]; + // TODO: @fixme + if declaration.identifier().value == id.value { + return Some(*declaration_id); + } + } + None + } } impl SyntaxElement for Root { @@ -1323,7 +1294,7 @@ pub struct NamespacedIdentifier { } impl NamespacedIdentifier { - fn iter(&self) -> NamespacedIdentifierIter { + pub(crate) fn iter(&self) -> NamespacedIdentifierIter { NamespacedIdentifierIter{ value: &self.value, cur_offset: 0, @@ -1333,13 +1304,30 @@ impl NamespacedIdentifier { } } -struct NamespacedIdentifierIter<'a> { +impl PartialEq for NamespacedIdentifier { + fn eq(&self, other: &Self) -> bool { + return self.value == other.value + } +} +impl Eq for NamespacedIdentifier{} + +// TODO: Just keep ref to NamespacedIdentifier +pub(crate) struct NamespacedIdentifierIter<'a> { value: &'a Vec, cur_offset: usize, num_returned: u8, num_total: u8, } +impl<'a> NamespacedIdentifierIter<'a> { + pub(crate) fn num_returned(&self) -> u8 { + return self.num_returned; + } + pub(crate) fn num_remaining(&self) -> u8 { + return self.num_total - self.num_returned + } +} + impl<'a> Iterator for NamespacedIdentifierIter<'a> { type Item = &'a [u8]; fn next(&mut self) -> Option { @@ -1377,8 +1365,6 @@ impl Display for Identifier { } } -type TypeData = Vec; - #[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)] pub enum PrimitiveType { Input, @@ -1389,9 +1375,25 @@ pub enum PrimitiveType { Short, Int, Long, - Symbolic(TypeData), + Symbolic(PrimitiveSymbolic) +} + +// TODO: @cleanup, remove PartialEq implementations +#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] +pub struct PrimitiveSymbolic { + // Phase 1: parser + pub(crate) identifier: NamespacedIdentifier, + // Phase 2: typing + pub(crate) definition: Option } +impl PartialEq for PrimitiveSymbolic { + fn eq(&self, other: &Self) -> bool { + self.identifier == other.identifier + } +} +impl Eq for PrimitiveSymbolic{} + #[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)] pub struct Type { pub primitive: PrimitiveType, @@ -1448,7 +1450,18 @@ impl Display for Type { } PrimitiveType::Symbolic(data) => { // Type data is in ASCII range. - write!(f, "{}", String::from_utf8_lossy(&data))?; + if let Some(id) = &data.definition { + write!( + f, "Symbolic({}, id: {})", + String::from_utf8_lossy(&data.identifier.value), + id.0.index + )?; + } else { + write!( + f, "Symbolic({}, id: Unresolved)", + String::from_utf8_lossy(&data.identifier.value) + )?; + } } } if self.array { @@ -1490,7 +1503,13 @@ pub enum Method { Get, Fires, Create, - Symbolic(Identifier), + Symbolic(MethodSymbolic) +} + +#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] +pub struct MethodSymbolic { + pub(crate) identifier: NamespacedIdentifier, + pub(crate) definition: Option } #[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] @@ -1673,17 +1692,11 @@ impl Definition { _ => panic!("Unable to cast `Definition` to `Function`"), } } - pub fn as_composite(&self) -> &Composite { - self.as_component().as_composite() - } - pub fn as_primitive(&self) -> &Primitive { - self.as_component().as_primitive() - } pub fn identifier(&self) -> &Identifier { match self { Definition::Struct(def) => &def.identifier, Definition::Enum(def) => &def.identifier, - Definition::Component(com) => com.identifier(), + Definition::Component(com) => &com.identifier, Definition::Function(fun) => &fun.identifier, } } @@ -1691,7 +1704,7 @@ impl Definition { // TODO: Fix this static EMPTY_VEC: Vec = Vec::new(); match self { - Definition::Component(com) => com.parameters(), + Definition::Component(com) => &com.parameters, Definition::Function(fun) => &fun.parameters, _ => &EMPTY_VEC, } @@ -1699,7 +1712,7 @@ impl Definition { pub fn body(&self) -> StatementId { // TODO: Fix this match self { - Definition::Component(com) => com.body(), + Definition::Component(com) => com.body, Definition::Function(fun) => fun.body, _ => panic!("cannot retrieve body (for EnumDefinition or StructDefinition)") } @@ -1771,87 +1784,24 @@ pub struct EnumDefinition { pub variants: Vec, } -#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] -pub enum Component { - Composite(Composite), - Primitive(Primitive), -} - -impl Component { - pub fn this(&self) -> ComponentId { - match self { - Component::Composite(com) => com.this.upcast(), - Component::Primitive(prim) => prim.this.upcast(), - } - } - pub fn as_composite(&self) -> &Composite { - match self { - Component::Composite(result) => result, - _ => panic!("Unable to cast `Component` to `Composite`"), - } - } - pub fn as_primitive(&self) -> &Primitive { - match self { - Component::Primitive(result) => result, - _ => panic!("Unable to cast `Component` to `Primitive`"), - } - } - fn identifier(&self) -> &Identifier { - match self { - Component::Composite(com) => &com.identifier, - Component::Primitive(prim) => &prim.identifier, - } - } - pub fn parameters(&self) -> &Vec { - match self { - Component::Composite(com) => &com.parameters, - Component::Primitive(prim) => &prim.parameters, - } - } - pub fn body(&self) -> StatementId { - match self { - Component::Composite(com) => com.body, - Component::Primitive(prim) => prim.body, - } - } -} - -impl SyntaxElement for Component { - fn position(&self) -> InputPosition { - match self { - Component::Composite(def) => def.position(), - Component::Primitive(def) => def.position(), - } - } -} - -#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] -pub struct Composite { - pub this: CompositeId, - // Phase 1: parser - pub position: InputPosition, - pub identifier: Identifier, - pub parameters: Vec, - pub body: StatementId, -} - -impl SyntaxElement for Composite { - fn position(&self) -> InputPosition { - self.position - } +#[derive(Debug, Clone, Copy, serde::Serialize, serde::Deserialize)] +pub enum ComponentVariant { + Primitive, + Composite, } #[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] -pub struct Primitive { - pub this: PrimitiveId, +pub struct Component { + pub this: ComponentId, // Phase 1: parser pub position: InputPosition, + pub variant: ComponentVariant, pub identifier: Identifier, pub parameters: Vec, pub body: StatementId, } -impl SyntaxElement for Primitive { +impl SyntaxElement for Component { fn position(&self) -> InputPosition { self.position } @@ -1931,8 +1881,8 @@ impl Signature { // TODO: Fix this match &h[def] { Definition::Component(com) => Signature::Component(ComponentSignature { - identifier: com.identifier().clone(), // TODO: @fix - arity: Signature::convert_parameters(h, com.parameters()), + identifier: com.identifier.clone(), // TODO: @fix + arity: Signature::convert_parameters(h, &com.parameters), }), Definition::Function(fun) => Signature::Function(FunctionSignature { return_type: h[fun.return_type].the_type.clone(), diff --git a/src/protocol/containers.rs b/src/protocol/containers.rs index 348bbaaca7c70d11bedb9fbbc65a26bfcbc5eed6..d0a27eda76f4489eb051aa536b4ff07c57c491f3 100644 --- a/src/protocol/containers.rs +++ b/src/protocol/containers.rs @@ -1,6 +1,10 @@ /// Containers.rs /// /// Contains specialized containers for the parser/compiler +/// TODO: Actually implement, I really want to remove all of the identifier +/// allocations. + +use std::collections::LinkedList; const PAGE_SIZE: usize = 4096; @@ -12,12 +16,11 @@ struct StringPage { impl StringPage { fn new() -> Self{ - let res = Self{ + Self{ buffer: [0; PAGE_SIZE], remaining: PAGE_SIZE, next_page: None - }; - res + } } } @@ -31,6 +34,8 @@ pub(crate) struct StringAllocator { last_page: *mut StringPage, } +unsafe impl Send for StringAllocator {} + impl StringAllocator { pub(crate) fn new() -> StringAllocator { let mut page = Box::new(StringPage::new()); @@ -90,10 +95,14 @@ mod tests { assert!(alloc.first_page.next_page.is_none()); assert_eq!(alloc.first_page.as_ref() as *const StringPage, alloc.last_page); + // Insert and make page full, should not allocate another page yet let input = "I am a simple static string"; let filler = " ".repeat(PAGE_SIZE - input.len()); let ref_first = alloc.alloc(input.as_bytes()).expect("alloc first"); let ref_filler = alloc.alloc(filler.as_bytes()).expect("alloc filler"); + assert!(alloc.first_page.next_page.is_none()); + assert_eq!(alloc.first_page.as_ref() as *const StringPage, alloc.last_page); + let ref_second = alloc.alloc(input.as_bytes()).expect("alloc second"); assert!(alloc.first_page.next_page.is_some()); diff --git a/src/protocol/lexer.rs b/src/protocol/lexer.rs index ced2c6a589f46bf43faa5c5e8c21fbbf4ab5de72..4ae30955efb5e6734fd6cd28403ed5d316bc9bb2 100644 --- a/src/protocol/lexer.rs +++ b/src/protocol/lexer.rs @@ -2,6 +2,7 @@ use crate::protocol::ast::*; use crate::protocol::inputsource::*; const MAX_LEVEL: usize = 128; +const MAX_NAMESPACES: u8 = 8; // only three levels are supported at the moment fn is_vchar(x: Option) -> bool { if let Some(c) = x { @@ -259,7 +260,7 @@ impl Lexer<'_> { } // Statement keywords - + // TODO: Clean up these functions fn has_statement_keyword(&self) -> bool { self.has_keyword(b"channel") || self.has_keyword(b"skip") @@ -272,7 +273,7 @@ impl Lexer<'_> { || self.has_keyword(b"assert") || self.has_keyword(b"goto") || self.has_keyword(b"new") - || self.has_keyword(b"put") + || self.has_keyword(b"put") // TODO: @fix, should be a function, even though it has sideeffects } fn has_type_keyword(&self) -> bool { self.has_keyword(b"in") @@ -290,6 +291,17 @@ impl Lexer<'_> { || self.has_keyword(b"create") || self.has_keyword(b"length") } + fn has_reserved(&self) -> bool { + self.has_statement_keyword() + || self.has_type_keyword() + || self.has_builtin_keyword() + || self.has_keyword(b"let") + || self.has_keyword(b"struct") + || self.has_keyword(b"enum") + || self.has_keyword(b"true") + || self.has_keyword(b"false") + || self.has_keyword(b"null") + } // Identifiers @@ -300,7 +312,7 @@ impl Lexer<'_> { let next = self.source.next(); is_ident_start(next) } - fn consume_identifier(&mut self, h: &mut Heap) -> Result { + fn consume_identifier(&mut self) -> Result { if self.has_statement_keyword() || self.has_type_keyword() || self.has_builtin_keyword() { return Err(self.error_at_pos("Expected identifier")); } @@ -315,9 +327,33 @@ impl Lexer<'_> { self.consume_ident()?; Ok(()) } + fn has_namespaced_identifier(&self) -> bool { + self.has_identifier() + } + fn consume_namespaced_identifier(&mut self) -> Result { + if self.has_reserved() { + return Err(self.error_at_pos("Encountered reserved keyword")); + } - // Types and type annotations + let position = self.source.pos(); + let mut ns_ident = self.consume_ident()?; + let mut num_namespaces = 1; + while self.has_string(b"::") { + if num_namespaces >= MAX_NAMESPACES { + return Err(self.error_at_pos("Too many namespaces in identifier")); + } + let new_ident = self.consume_ident()?; + num_namespaces += 1; + } + Ok(NamespacedIdentifier{ + position, + value: ns_ident, + num_namespaces, + }) + } + + // Types and type annotations fn consume_primitive_type(&mut self) -> Result { if self.has_keyword(b"in") { self.consume_keyword(b"in")?; @@ -343,9 +379,14 @@ impl Lexer<'_> { } else if self.has_keyword(b"long") { self.consume_keyword(b"long")?; Ok(PrimitiveType::Long) + } else if self.has_keyword(b"let") { + return Err(self.error_at_pos("inferred types using 'let' are reserved, but not yet implemented")); } else { - let data = self.consume_ident()?; - Ok(PrimitiveType::Symbolic(data)) + let identifier = self.consume_namespaced_identifier()?; + Ok(PrimitiveType::Symbolic(PrimitiveSymbolic{ + identifier, + definition: None + })) } } fn has_array(&mut self) -> bool { @@ -398,7 +439,7 @@ impl Lexer<'_> { let position = self.source.pos(); let type_annotation = self.consume_type_annotation(h)?; self.consume_whitespace(true)?; - let identifier = self.consume_identifier(h)?; + let identifier = self.consume_identifier()?; let id = h.alloc_parameter(|this| Parameter { this, position, type_annotation, identifier }); Ok(id) @@ -979,7 +1020,7 @@ impl Lexer<'_> { self.consume_keyword(b"length")?; field = Field::Length; } else { - field = Field::Symbolic(self.consume_identifier(h)?); + field = Field::Symbolic(self.consume_identifier()?); } result = h .alloc_select_expression(|this| SelectExpression { @@ -1106,8 +1147,11 @@ impl Lexer<'_> { self.consume_keyword(b"create")?; method = Method::Create; } else { - let identifier = self.consume_identifier(h)?; - method = Method::Symbolic(identifier) + let identifier = self.consume_namespaced_identifier()?; + method = Method::Symbolic(MethodSymbolic{ + identifier, + definition: None + }) } self.consume_whitespace(false)?; let mut arguments = Vec::new(); @@ -1138,7 +1182,7 @@ impl Lexer<'_> { h: &mut Heap, ) -> Result { let position = self.source.pos(); - let identifier = self.consume_identifier(h)?; + let identifier = self.consume_identifier()?; Ok(h.alloc_variable_expression(|this| VariableExpression { this, position, @@ -1277,12 +1321,12 @@ impl Lexer<'_> { self.consume_keyword(b"channel")?; self.consume_whitespace(true)?; let from_annotation = self.create_type_annotation_output(h)?; - let from_identifier = self.consume_identifier(h)?; + let from_identifier = self.consume_identifier()?; self.consume_whitespace(false)?; self.consume_string(b"->")?; self.consume_whitespace(false)?; let to_annotation = self.create_type_annotation_input(h)?; - let to_identifier = self.consume_identifier(h)?; + let to_identifier = self.consume_identifier()?; self.consume_whitespace(false)?; self.consume_string(b";")?; let from = h.alloc_local(|this| Local { @@ -1309,7 +1353,7 @@ impl Lexer<'_> { let position = self.source.pos(); let type_annotation = self.consume_type_annotation(h)?; self.consume_whitespace(true)?; - let identifier = self.consume_identifier(h)?; + let identifier = self.consume_identifier()?; self.consume_whitespace(false)?; self.consume_string(b"=")?; self.consume_whitespace(false)?; @@ -1330,7 +1374,7 @@ impl Lexer<'_> { h: &mut Heap, ) -> Result { let position = self.source.pos(); - let label = self.consume_identifier(h)?; + let label = self.consume_identifier()?; self.consume_whitespace(false)?; self.consume_string(b":")?; self.consume_whitespace(false)?; @@ -1389,7 +1433,7 @@ impl Lexer<'_> { self.consume_whitespace(false)?; let label; if self.has_identifier() { - label = Some(self.consume_identifier(h)?); + label = Some(self.consume_identifier()?); self.consume_whitespace(false)?; } else { label = None; @@ -1406,7 +1450,7 @@ impl Lexer<'_> { self.consume_whitespace(false)?; let label; if self.has_identifier() { - label = Some(self.consume_identifier(h)?); + label = Some(self.consume_identifier()?); self.consume_whitespace(false)?; } else { label = None; @@ -1477,7 +1521,7 @@ impl Lexer<'_> { let position = self.source.pos(); self.consume_keyword(b"goto")?; self.consume_whitespace(false)?; - let label = self.consume_identifier(h)?; + let label = self.consume_identifier()?; self.consume_whitespace(false)?; self.consume_string(b";")?; Ok(h.alloc_goto_statement(|this| GotoStatement { this, position, label, target: None })) @@ -1549,7 +1593,7 @@ impl Lexer<'_> { let struct_pos = self.source.pos(); self.consume_keyword(b"struct")?; self.consume_whitespace(true)?; - let struct_ident = self.consume_identifier(h)?; + let struct_ident = self.consume_identifier()?; self.consume_whitespace(false)?; // Parse struct fields @@ -1567,7 +1611,7 @@ impl Lexer<'_> { let field_position = self.source.pos(); let field_type = self.consume_type_annotation(h)?; self.consume_whitespace(true)?; - let field_ident = self.consume_identifier(h)?; + let field_ident = self.consume_identifier()?; self.consume_whitespace(false)?; fields.push(StructFieldDefinition{ @@ -1605,7 +1649,7 @@ impl Lexer<'_> { let enum_pos = self.source.pos(); self.consume_keyword(b"enum")?; self.consume_whitespace(true)?; - let enum_ident = self.consume_identifier(h)?; + let enum_ident = self.consume_identifier()?; self.consume_whitespace(false)?; // Parse enum variants @@ -1621,7 +1665,7 @@ impl Lexer<'_> { // Consume variant identifier self.consume_whitespace(false)?; let variant_position = self.source.pos(); - let variant_ident = self.consume_identifier(h)?; + let variant_ident = self.consume_identifier()?; self.consume_whitespace(false)?; // Consume variant (tag) value: may be nothing, in which case it is @@ -1685,41 +1729,46 @@ impl Lexer<'_> { })) } fn consume_component_definition(&mut self, h: &mut Heap) -> Result { + // TODO: Cleanup if self.has_keyword(b"composite") { - Ok(self.consume_composite_definition(h)?.upcast()) + Ok(self.consume_composite_definition(h)?) } else { - Ok(self.consume_primitive_definition(h)?.upcast()) + Ok(self.consume_primitive_definition(h)?) } } - fn consume_composite_definition(&mut self, h: &mut Heap) -> Result { + fn consume_composite_definition(&mut self, h: &mut Heap) -> Result { let position = self.source.pos(); self.consume_keyword(b"composite")?; self.consume_whitespace(true)?; - let identifier = self.consume_identifier(h)?; + let identifier = self.consume_identifier()?; self.consume_whitespace(false)?; let mut parameters = Vec::new(); self.consume_parameters(h, &mut parameters)?; self.consume_whitespace(false)?; let body = self.consume_block_statement(h)?; - Ok(h.alloc_composite(|this| Composite { this, position, identifier, parameters, body })) + Ok(h.alloc_component(|this| Component { + this, variant: ComponentVariant::Composite, position, identifier, parameters, body + })) } - fn consume_primitive_definition(&mut self, h: &mut Heap) -> Result { + fn consume_primitive_definition(&mut self, h: &mut Heap) -> Result { let position = self.source.pos(); self.consume_keyword(b"primitive")?; self.consume_whitespace(true)?; - let identifier = self.consume_identifier(h)?; + let identifier = self.consume_identifier()?; self.consume_whitespace(false)?; let mut parameters = Vec::new(); self.consume_parameters(h, &mut parameters)?; self.consume_whitespace(false)?; let body = self.consume_block_statement(h)?; - Ok(h.alloc_primitive(|this| Primitive { this, position, identifier, parameters, body })) + Ok(h.alloc_component(|this| Component { + this, variant: ComponentVariant::Primitive, position, identifier, parameters, body + })) } fn consume_function_definition(&mut self, h: &mut Heap) -> Result { let position = self.source.pos(); let return_type = self.consume_type_annotation(h)?; self.consume_whitespace(true)?; - let identifier = self.consume_identifier(h)?; + let identifier = self.consume_identifier()?; self.consume_whitespace(false)?; let mut parameters = Vec::new(); self.consume_parameters(h, &mut parameters)?; @@ -2072,6 +2121,15 @@ mod tests { assert_eq!(root.definitions.len(), 2); + let symbolic_type = |v: &PrimitiveType| -> Vec { + if let PrimitiveType::Symbolic(v) = v { + v.identifier.value.clone() + } else { + assert!(false); + unreachable!(); + } + }; + let foo_def = h[root.definitions[0]].as_struct(); assert_eq!(foo_def.identifier.value, b"Foo"); assert_eq!(foo_def.fields.len(), 3); @@ -2080,7 +2138,10 @@ mod tests { assert_eq!(foo_def.fields[1].field.value, b"two"); assert_eq!(h[foo_def.fields[1].the_type].the_type, Type::SHORT); assert_eq!(foo_def.fields[2].field.value, b"three"); - assert_eq!(h[foo_def.fields[2].the_type].the_type.primitive, PrimitiveType::Symbolic(Vec::from("Bar".as_bytes()))); + assert_eq!( + symbolic_type(&h[foo_def.fields[2].the_type].the_type.primitive), + Vec::from("Bar".as_bytes()) + ); let bar_def = h[root.definitions[1]].as_struct(); assert_eq!(bar_def.identifier.value, b"Bar"); @@ -2091,7 +2152,10 @@ mod tests { assert_eq!(h[bar_def.fields[1].the_type].the_type, Type::INT_ARRAY); assert_eq!(bar_def.fields[2].field.value, b"three"); assert_eq!(h[bar_def.fields[2].the_type].the_type.array, true); - assert_eq!(h[bar_def.fields[2].the_type].the_type.primitive, PrimitiveType::Symbolic(Vec::from("Qux".as_bytes()))); + assert_eq!( + symbolic_type(&h[bar_def.fields[2].the_type].the_type.primitive), + Vec::from("Qux".as_bytes()) + ); } #[test] @@ -2151,7 +2215,10 @@ mod tests { assert_eq!(enum_type(&qux_def.variants[0].value).the_type, Type::BYTE_ARRAY); assert_eq!(qux_def.variants[1].identifier.value, b"B"); assert_eq!(enum_type(&qux_def.variants[1].value).the_type.array, true); - assert_eq!(enum_type(&qux_def.variants[1].value).the_type.primitive, PrimitiveType::Symbolic(Vec::from("Bar".as_bytes()))); + if let PrimitiveType::Symbolic(t) = &enum_type(&qux_def.variants[1].value).the_type.primitive { + assert_eq!(t.identifier.value, Vec::from("Bar".as_bytes())); + } else { assert!(false) } + assert_eq!(qux_def.variants[2].identifier.value, b"C"); assert_eq!(enum_type(&qux_def.variants[2].value).the_type, Type::BYTE); } diff --git a/src/protocol/parser/depth_visitor.rs b/src/protocol/parser/depth_visitor.rs index 33452fcb8aac76ca1d17458a603226e0692c5eb6..b15f88d77adf7153c2cb1db3e40a392cf9c63908 100644 --- a/src/protocol/parser/depth_visitor.rs +++ b/src/protocol/parser/depth_visitor.rs @@ -30,10 +30,10 @@ pub(crate) trait Visitor: Sized { fn visit_component_definition(&mut self, h: &mut Heap, def: ComponentId) -> VisitorResult { recursive_component_definition(self, h, def) } - fn visit_composite_definition(&mut self, h: &mut Heap, def: CompositeId) -> VisitorResult { + fn visit_composite_definition(&mut self, h: &mut Heap, def: ComponentId) -> VisitorResult { recursive_composite_definition(self, h, def) } - fn visit_primitive_definition(&mut self, h: &mut Heap, def: PrimitiveId) -> VisitorResult { + fn visit_primitive_definition(&mut self, h: &mut Heap, def: ComponentId) -> VisitorResult { recursive_primitive_definition(self, h, def) } fn visit_function_definition(&mut self, h: &mut Heap, def: FunctionId) -> VisitorResult { @@ -236,7 +236,7 @@ fn recursive_symbol_definition( match h[def].clone() { Definition::Struct(def) => this.visit_struct_definition(h, def.this), Definition::Enum(def) => this.visit_enum_definition(h, def.this), - Definition::Component(cdef) => this.visit_component_definition(h, cdef.this()), + Definition::Component(cdef) => this.visit_component_definition(h, cdef.this), Definition::Function(fdef) => this.visit_function_definition(h, fdef.this), } } @@ -246,16 +246,17 @@ fn recursive_component_definition( h: &mut Heap, def: ComponentId, ) -> VisitorResult { - match h[def].clone() { - Component::Composite(cdef) => this.visit_composite_definition(h, cdef.this), - Component::Primitive(pdef) => this.visit_primitive_definition(h, pdef.this), + let component_variant = h[def].variant; + match component_variant { + ComponentVariant::Primitive => this.visit_primitive_definition(h, def), + ComponentVariant::Composite => this.visit_composite_definition(h, def), } } fn recursive_composite_definition( this: &mut T, h: &mut Heap, - def: CompositeId, + def: ComponentId, ) -> VisitorResult { for ¶m in h[def].parameters.clone().iter() { recursive_parameter_as_variable(this, h, param)?; @@ -266,7 +267,7 @@ fn recursive_composite_definition( fn recursive_primitive_definition( this: &mut T, h: &mut Heap, - def: PrimitiveId, + def: ComponentId, ) -> VisitorResult { for ¶m in h[def].parameters.clone().iter() { recursive_parameter_as_variable(this, h, param)?; @@ -551,7 +552,7 @@ impl NestedSynchronousStatements { } impl Visitor for NestedSynchronousStatements { - fn visit_composite_definition(&mut self, h: &mut Heap, def: CompositeId) -> VisitorResult { + fn visit_composite_definition(&mut self, h: &mut Heap, def: ComponentId) -> VisitorResult { assert!(!self.illegal); self.illegal = true; recursive_composite_definition(self, h, def)?; @@ -597,7 +598,7 @@ impl ChannelStatementOccurrences { } impl Visitor for ChannelStatementOccurrences { - fn visit_primitive_definition(&mut self, h: &mut Heap, def: PrimitiveId) -> VisitorResult { + fn visit_primitive_definition(&mut self, h: &mut Heap, def: ComponentId) -> VisitorResult { assert!(!self.illegal); self.illegal = true; recursive_primitive_definition(self, h, def)?; @@ -695,7 +696,7 @@ impl Visitor for ComponentStatementReturnNew { self.illegal_return = false; Ok(()) } - fn visit_primitive_definition(&mut self, h: &mut Heap, def: PrimitiveId) -> VisitorResult { + fn visit_primitive_definition(&mut self, h: &mut Heap, def: ComponentId) -> VisitorResult { assert!(!self.illegal_new); self.illegal_new = true; recursive_primitive_definition(self, h, def)?; @@ -849,6 +850,15 @@ impl LinkCallExpressions { None => Err((id.position, "Unresolved method".to_string())), } } + fn get_declaration_namespaced( + &self, h: &Heap, id: &NamespacedIdentifier + ) -> Result { + // TODO: @fixme + match h[self.pd.unwrap()].get_declaration_namespaced(h, id) { + Some(id) => Ok(id), + None => Err((id.position, "Unresolved method".to_string())) + } + } } impl Visitor for LinkCallExpressions { @@ -858,7 +868,7 @@ impl Visitor for LinkCallExpressions { self.pd = None; Ok(()) } - fn visit_composite_definition(&mut self, h: &mut Heap, def: CompositeId) -> VisitorResult { + fn visit_composite_definition(&mut self, h: &mut Heap, def: ComponentId) -> VisitorResult { assert!(!self.composite); self.composite = true; recursive_composite_definition(self, h, def)?; @@ -876,12 +886,12 @@ impl Visitor for LinkCallExpressions { fn visit_call_expression(&mut self, h: &mut Heap, expr: CallExpressionId) -> VisitorResult { if let Method::Symbolic(id) = &h[expr].method { // TODO: @symbol_table - let decl = self.get_declaration(h, id)?; + let decl = self.get_declaration_namespaced(h, &id.identifier)?; if self.new_statement && h[decl].is_function() { - return Err((id.position, "Illegal call expression".to_string())); + return Err((id.identifier.position, "Illegal call expression".to_string())); } if !self.new_statement && h[decl].is_component() { - return Err((id.position, "Illegal call expression".to_string())); + return Err((id.identifier.position, "Illegal call expression".to_string())); } // Set the corresponding declaration of the call h[expr].declaration = Some(decl); diff --git a/src/protocol/parser/symbol_table.rs b/src/protocol/parser/symbol_table.rs index 9902584d39451161353cf41d5ace4cf7828e6f0e..e96cd2c815c3541895817b00f4f63ae961f587c5 100644 --- a/src/protocol/parser/symbol_table.rs +++ b/src/protocol/parser/symbol_table.rs @@ -1,3 +1,10 @@ +// TODO: Maybe allow namespaced-aliased imports. It is currently not possible +// to express the following: +// import Module.Submodule as SubMod +// import SubMod::{Symbol} +// And it is especially not possible to express the following: +// import SubMod::{Symbol} +// import Module.Submodule as SubMod use crate::protocol::ast::*; use crate::protocol::inputsource::*; @@ -18,11 +25,17 @@ pub(crate) enum Symbol { pub(crate) struct SymbolValue { // Position refers to the origin of the symbol definition (i.e. the module's // RootId that is in the key being used to lookup this value) - position: InputPosition, - symbol: Symbol, + pub(crate) position: InputPosition, + pub(crate) symbol: Symbol, } impl SymbolValue { + pub(crate) fn is_namespace(&self) -> bool { + match &self.symbol { + Symbol::Namespace(_) => true, + _ => false + } + } pub(crate) fn as_namespace(&self) -> Option { match &self.symbol { Symbol::Namespace(root_id) => Some(*root_id), @@ -48,10 +61,6 @@ impl SymbolValue { /// namespaced identifiers (e.g. Module::Enum::EnumVariant) to the appropriate /// definition (i.e. not namespaces; as the language has no way to use /// namespaces except for using them in namespaced identifiers). -// TODO: Maybe allow namespaced-aliased imports. It is currently not possible -// to express the following: -// import Module.Submodule as ModSub -// import SubMod::{Symbol} pub(crate) struct SymbolTable { // Lookup from module name (not any aliases) to the root id module_lookup: HashMap, RootId>, @@ -283,8 +292,75 @@ impl SymbolTable { /// (i.e. an enum variant, or simply an erroneous instance of too many /// chained identifiers). This function will return None if nothing could be /// resolved at all. - pub(crate) fn resolve_namespaced_symbol(&self, _within_module_id: RootId) { - todo!("implement") + pub(crate) fn resolve_namespaced_symbol<'t, 'i>( + &'t self, root_module_id: RootId, identifier: &'i NamespacedIdentifier + ) -> Option<(&SymbolValue, NamespacedIdentifierIter<'i>)> { + let mut iter = identifier.iter(); + let mut symbol: Option<&SymbolValue> = None; + let mut within_module_id = root_module_id; + while let Some(partial) = iter.next() { + // Lookup the symbol within the currently iterated upon module + let lookup_key = SymbolKey{ module_id: within_module_id, symbol_name: Vec::from(partial) }; + let new_symbol = self.symbol_lookup.get(&lookup_key); + + match new_symbol { + None => { + // Can't find anything + break; + }, + Some(new_symbol) => { + // Found something, but if we already moved to another + // module then we don't want to keep jumping across modules, + // we're only interested in symbols defined within that + // module. + match &new_symbol.symbol { + Symbol::Namespace(new_root_id) => { + if root_module_id != within_module_id { + // Don't jump from module to module, keep the + // old symbol (which must be a Namespace) and + // break + debug_assert!(symbol.is_some()); + debug_assert!(symbol.unwrap().is_namespace()); + debug_assert!(iter.num_returned() > 1); + + // For handling this error, we need to revert + // the iterator by one + let to_skip = iter.num_returned() - 1; + iter = identifier.iter(); + for _ in 0..to_skip { iter.next(); } + break; + } + + within_module_id = *new_root_id; + symbol = Some(new_symbol); + }, + Symbol::Definition((definition_root_id, _)) => { + // Found a definition, but if we already jumped + // modules, then this must be defined within that + // module. + if root_module_id != within_module_id && within_module_id != *definition_root_id { + // This is an imported definition within the module + // TODO: Maybe factor out? Dunno... + debug_assert!(symbol.is_some()); + debug_assert!(symbol.unwrap().is_namespace()); + debug_assert!(iter.num_returned() > 1); + let to_skip = iter.num_returned() - 1; + iter = identifier.iter(); + for _ in 0..to_skip { iter.next(); } + break; + } + symbol = Some(new_symbol); + break; + } + } + } + } + } + + match symbol { + None => None, + Some(symbol) => Some((symbol, iter)) + } } /// Attempts to add a namespace symbol. Returns `Ok` if the symbol was @@ -292,7 +368,7 @@ impl SymbolTable { /// together with the previous definition's source position (in the origin /// module's source file). // Note: I would love to return a reference to the value, but Rust is - // preventing me from doing so... + // preventing me from doing so... That, or I'm not smart enough... fn add_namespace_symbol( &mut self, origin_module_id: RootId, origin_position: InputPosition, symbol_name: &Vec, target_module_id: RootId ) -> Result<(), InputPosition> { diff --git a/src/protocol/parser/type_table.rs b/src/protocol/parser/type_table.rs index ef4fddc7df0df7a303ced1f52a78c9f3c3a98053..0336aac3fc2aca20439941409f6bfb7733b56b87 100644 --- a/src/protocol/parser/type_table.rs +++ b/src/protocol/parser/type_table.rs @@ -1,11 +1,13 @@ // TODO: @fix PrimitiveType for enums/unions use crate::protocol::ast::*; -use crate::protocol::parser::symbol_table::*; +use crate::protocol::parser::symbol_table::{SymbolTable, Symbol}; +use crate::protocol::inputsource::*; +use crate::protocol::parser::*; use std::collections::HashMap; -enum DefinedType { +pub enum DefinedType { Enum(EnumType), Union(UnionType), Struct(StructType), @@ -14,7 +16,7 @@ enum DefinedType { } // TODO: Also support maximum u64 value -struct EnumVariant { +pub struct EnumVariant { identifier: Identifier, value: i64, } @@ -24,41 +26,536 @@ struct EnumVariant { /// compiler-defined, or a mix of the two. If a user assigns the same enum /// value multiple times, we assume the user is an expert and we consider both /// variants to be equal to one another. -struct EnumType { - definition: DefinitionId, +pub struct EnumType { variants: Vec, representation: PrimitiveType, } -struct UnionVariant { +pub struct UnionVariant { identifier: Identifier, - embedded_type: Option, + embedded_type: Option, tag_value: i64, } -struct UnionType { - definition: DefinitionId, - variants: Vec, +pub struct UnionType { + variants: Vec, tag_representation: PrimitiveType } -struct StructMemberType { +pub struct StructField { identifier: Identifier, + field_type: TypeAnnotationId, +} + +pub struct StructType { + fields: Vec, +} +pub struct FunctionArgument { + identifier: Identifier, + argument_type: TypeAnnotationId, } -struct StructType { +pub struct FunctionType { + return_type: Type, + arguments: Vec +} + +pub struct ComponentType { + variant: ComponentVariant, + arguments: Vec +} + +pub struct TypeTable { + lookup: HashMap, +} +enum LookupResult { + BuiltIn, + Resolved((RootId, DefinitionId)), + Unresolved((RootId, DefinitionId)), + Error((InputPosition, String)), } -struct FunctionType { +/// `TypeTable` is responsible for walking the entire lexed AST and laying out +/// the various user-defined types in terms of bytes and offsets. This process +/// may be pseudo-recursive (meaning: it is implemented in a recursive fashion, +/// but not in the recursive-function-call kind of way) in case a type depends +/// on another type to be resolved. +/// TODO: Distinction between resolved types and unresolved types (regarding the +/// mixed use of BuiltIns and resolved types) is a bit yucky at the moment, +/// will have to come up with something nice in the future. +/// TODO: Need to factor out the repeated lookup in some kind of state machine. +impl TypeTable { + fn new( + symbols: &SymbolTable, heap: &Heap, modules: &[LexedModule] + ) -> Result { + if cfg!(debug_assertions) { + for (index, module) in modules.iter().enumerate() { + debug_assert_eq!(index, module.root_id.0.index as usize) + } + } + + // Estimate total number of definitions we will encounter + let num_definitions = heap.definitions.len(); + let mut table = TypeTable{ + lookup: HashMap::with_capacity(num_definitions), + }; + + // Perform the breadcrumb-based type parsing. For now we do not allow + // cyclic types. + // TODO: Allow cyclic types. However, we want to have an implementation + // that is somewhat efficient: if a type is cyclic, then we have to + // insert a pointer somewhere. However, we don't want to insert them + // everywhere, for each possible type. Without any further context the + // decision to place a pointer somewhere is "random", we need to know + // how the type is used to have an informed opinion on where to place + // the pointer. + enum Breadcrumb { + Linear((usize, usize)), + Jumping((RootId, DefinitionId)) + } + + let mut module_index = 0; + let mut definition_index = 0; + let mut breadcrumbs = Vec::with_capacity(32); // if a user exceeds this, the user sucks at programming + while module_index < modules.len() { + // Go to next module if needed + { + let root = &heap[modules[module_index].root_id]; + if definition_index >= root.definitions.len() { + module_index += 1; + definition_index = 0; + continue; + } + } + + // Construct breadcrumbs in case we need to follow some types around + debug_assert!(breadcrumbs.is_empty()); + breadcrumbs.push(Breadcrumb::Linear((module_index, definition_index))); + 'resolve_loop: while !breadcrumbs.is_empty() { + // Retrieve module, the module's root and the definition + let (module, root, definition_id) = match breadcrumbs.last().unwrap() { + Breadcrumb::Linear((module_index, definition_index)) => { + let module = &modules[*module_index]; + let root = &heap[module.root_id]; + let definition_id = root.definitions[*definition_index]; + (module, root, definition_id) + }, + Breadcrumb::Jumping((root_id, definition_id)) => { + let module = &modules[root_id.0.index as usize]; + debug_assert_eq!(module.root_id, *root_id); + let root = &heap[*root_id]; + (module, root, *definition_id) + } + }; + + let definition = &heap[definition_id]; + + // Because we might have chased around to this particular + // definition before, we check if we haven't resolved the type + // already. + if table.lookup.contains_key(&definition_id) { + breadcrumbs.pop(); + continue; + } + + match definition { + Definition::Enum(definition) => { + // Check the definition to see if we're dealing with an + // enum or a union. If we find any union variants then + // we immediately check if the type is already resolved. + let mut has_tag_values = None; + let mut has_int_values = None; + for variant in &definition.variants { + match &variant.value { + EnumVariantValue::None => {}, + EnumVariantValue::Integer(_) => { + if has_int_values.is_none() { has_int_values = Some(variant.position); } + }, + EnumVariantValue::Type(variant_type) => { + if has_tag_values.is_none() { has_tag_values = Some(variant.position); } + + let variant_type = &heap[*variant_type]; + match lookup_type_definition( + heap, &table, symbols, module.root_id, + &variant_type.the_type.primitive + ) { + LookupResult::BuiltIn | LookupResult::Resolved(_) => {}, + LookupResult::Unresolved(root_and_definition_id) => { + breadcrumbs.push(Breadcrumb::Jumping(root_and_definition_id)); + continue 'resolve_loop; + }, + LookupResult::Error((position, message)) => { + return Err(ParseError2::new_error(&module.source, position, &message)); + } + } + }, + } + } + + if has_tag_values.is_some() && has_int_values.is_some() { + // Not entirely illegal, but probably not desired + let tag_pos = has_tag_values.unwrap(); + let int_pos = has_int_values.unwrap(); + return Err( + ParseError2::new_error(&module.source, definition.position, "Illegal combination of enum integer variant(s) and enum union variant(s)") + .with_postfixed_info(&module.source, int_pos, "Explicitly assigning an integer value here") + .with_postfixed_info(&module.source, tag_pos, "Explicitly declaring a union variant here") + ) + } + + // If here, then the definition is a valid discriminated + // union with all of its types resolved, or a valid + // enum. + // Decide whether to implement as enum or as union + let is_union = has_tag_values.is_some(); + if is_union { + // Implement as discriminated union. Because we + // checked the availability of types above, we are + // safe to lookup type definitions + let mut tag_value = -1; + let mut variants = Vec::with_capacity(definition.variants.len()); + for variant in &definition.variants { + tag_value += 1; + let embedded_type = match &variant.value { + EnumVariantValue::None => { + None + }, + EnumVariantValue::Type(type_annotation_id) => { + // Type should be resolvable, we checked this above + let type_annotation = &heap[*type_annotation_id]; + // TODO: Remove the assert once I'm clear on how to layout "the types" of types + if cfg!(debug_assertions) { + ensure_type_definition(heap, &table, symbols, module.root_id, &type_annotation.the_type.primitive); + } + + Some(*type_annotation_id) + }, + EnumVariantValue::Integer(_) => { + debug_assert!(false, "Encountered `Integer` variant after asserting enum is a discriminated union"); + unreachable!(); + } + }; + + variants.push(UnionVariant{ + identifier: variant.identifier.clone(), + embedded_type, + tag_value, + }) + } + + table.add_definition(definition_id, DefinedType::Union(UnionType{ + variants, + tag_representation: enum_representation(tag_value) + })); + } else { + // Implement as regular enum + let mut enum_value = -1; // TODO: allow u64 max size + let mut variants = Vec::with_capacity(definition.variants.len()); + for variant in &definition.variants { + enum_value += 1; + match &variant.value { + EnumVariantValue::None => { + variants.push(EnumVariant{ + identifier: variant.identifier.clone(), + value: enum_value, + }); + }, + EnumVariantValue::Integer(override_value) => { + enum_value = *override_value; + variants.push(EnumVariant{ + identifier: variant.identifier.clone(), + value: enum_value, + }); + }, + EnumVariantValue::Type(_) => { + debug_assert!(false, "Encountered `Type` variant after asserting enum is not a discriminated union"); + unreachable!(); + } + } + } + + table.add_definition(definition_id, DefinedType::Enum(EnumType{ + variants, + representation: enum_representation(enum_value), + })); + } + }, + Definition::Struct(definition) => { + // Before we start allocating fields, make sure we can + // actually resolve all of the field types + for field_definition in &definition.fields { + let type_definition = &heap[field_definition.the_type]; + match lookup_type_definition( + heap, &table, symbols, module.root_id, + &type_definition.the_type.primitive + ) { + LookupResult::BuiltIn | LookupResult::Resolved(_) => {}, + LookupResult::Unresolved(root_and_definition_id) => { + breadcrumbs.push(Breadcrumb::Jumping(root_and_definition_id)); + continue 'resolve_loop; + }, + LookupResult::Error((position, message)) => { + return Err(ParseError2::new_error(&module.source, position, &message)); + } + } + } + + // We can resolve everything + let mut fields = Vec::with_capacity(definition.fields.len()); + for field_definition in &definition.fields { + let type_annotation = &heap[field_definition.the_type]; + if cfg!(debug_assertions) { + ensure_type_definition(heap, &table, symbols, module.root_id, &type_annotation.the_type.primitive); + } + + fields.push(StructField{ + identifier: field_definition.field.clone(), + field_type: field_definition.the_type + }); + } + + table.add_definition(definition_id, DefinedType::Struct(StructType{ + fields, + })); + }, + Definition::Component(definition) => { + // As always, ensure all parameter types are resolved + for parameter_id in &definition.parameters { + let parameter = &heap[*parameter_id]; + let type_definition = &heap[parameter.type_annotation]; + match lookup_type_definition( + heap, &table, symbols, module.root_id, + &type_definition.the_type.primitive + ) { + LookupResult::BuiltIn | LookupResult::Resolved(_) => {}, + LookupResult::Unresolved(root_and_definition_id) => { + breadcrumbs.push(Breadcrumb::Jumping(root_and_definition_id)); + continue 'resolve_loop; + }, + LookupResult::Error((position, message)) => { + return Err(ParseError2::new_error(&module.source, position, &message)); + } + } + } + + // We can resolve everything + let mut parameters = Vec::with_capacity(definition.parameters.len()); + for parameter_id in &definition.parameters { + let parameter = &heap[*parameter_id]; + let type_definition = &heap[parameter.type_annotation]; + if cfg!(debug_assertions) { + ensure_type_definition(heap, &table, symbols, module.root_id, &type_definition.the_type.primitive); + } + + parameters.push(FunctionArgument{ + identifier: parameter.identifier.clone(), + argument_type: parameter.type_annotation, + }); + } + + table.add_definition(definition_id, DefinedType::Component(ComponentType{ + variant: definition.variant, + arguments: parameters, // Arguments, parameters, tomayto, tomahto + })); + }, + Definition::Function(definition) => { + // TODO: Onto the last one! + }, + } + + // If here, then we layed out the current type definition under + // investigation, so: + debug_assert!(!breadcrumbs.is_empty()); + breadcrumbs.pop(); + } + + // Go to next definition + definition_index += 1; + } + + debug_assert_eq!( + num_definitions, table.lookup.len(), + "expected {} (reserved) definitions in table, got {}", + num_definitions, table.lookup.len() + ); + + Ok(table) + } + + pub(crate) fn get_definition(&self, definition_id: &DefinitionId) -> Option<&DefinedType> { + self.lookup.get(definition_id) + } + + fn add_definition(&mut self, definition_id: DefinitionId, definition: DefinedType) { + debug_assert!(!self.lookup.contains_key(&definition_id), "already added definition"); + self.lookup.insert(definition_id, definition); + } } -struct ComponentType { +/// Attempts to lookup a type definition using a namespaced identifier. We have +/// three success cases: the first is simply that the type is a `BuiltIn` type. +/// In the two other cases both we find the definition of the type in the symbol +/// table, but in one case it is already `Resolved` in the type table, and in +/// the other case it is `Unresolved`. In this last case the type has to be +/// resolved before we're able to use it in the `TypeTable` construction +/// algorithm. +/// In the `Error` case something goes wrong with resolving the type. The error +/// message aims to be as helpful as possible to the user. +/// The caller should ensure that the `module_root` is where the `identifier` +/// lives. +fn lookup_type_definition( + heap: &Heap, types: &TypeTable, symbols: &SymbolTable, + module_root: RootId, type_to_resolve: &PrimitiveType +) -> LookupResult { + if let PrimitiveType::Symbolic(type_to_resolve) = type_to_resolve { + let identifier = &type_to_resolve.identifier; + match symbols.resolve_namespaced_symbol(module_root, identifier) { + None => { + // Failed to find anything at all + LookupResult::Error((identifier.position, String::from("Unknown type"))) + }, + Some((symbol, mut identifier_iter)) => { + match symbol.symbol { + Symbol::Namespace(_root_id) => { + // Reference to a namespace, which is not a type. However, + // the error message depends on whether we have identifiers + // remaining or not + if identifier_iter.num_remaining() == 0 { + LookupResult::Error(( + identifier.position, + String::from("Expected a type, got a module name") + )) + } else { + let next_identifier = identifier_iter.next().unwrap(); + LookupResult::Error(( + identifier.position, + format!("Cannot find symbol '{}' in this module", String::from_utf8_lossy(next_identifier)) + )) + } + }, + Symbol::Definition((definition_root_id, definition_id)) => { + // Got a definition, but we may also have more identifier's + // remaining in the identifier iterator + let definition = &heap[definition_id]; + if identifier_iter.num_remaining() == 0 { + // See if the type is resolved, and make sure it is + // a non-function, non-component type. Ofcourse + // these are valid types, but we cannot (yet?) use + // them as function arguments, struct fields or enum + // variants + match definition { + Definition::Component(definition) => { + return LookupResult::Error(( + identifier.position, + format!( + "Cannot use the component '{}' as an embedded type", + String::from_utf8_lossy(&definition.identifier.value) + ) + )); + }, + Definition::Function(definition) => { + return LookupResult::Error(( + identifier.position, + format!( + "Cannot use the function '{}' as an embedded type", + String::from_utf8_lossy(&definition.identifier.value) + ) + )); + }, + Definition::Enum(_) | Definition::Struct(_) => {} + } + + return if types.lookup.contains_key(&definition_id) { + LookupResult::Resolved((definition_root_id, definition_id)) + } else { + LookupResult::Unresolved((definition_root_id, definition_id)) + } + } else if identifier_iter.num_remaining() == 1 { + // This is always invalid, but if the type is an + // enumeration or a union then we want to return a + // different error message. + if definition.is_enum() { + let last_identifier = identifier_iter.next().unwrap(); + return LookupResult::Error(( + identifier.position, + format!( + "Expected a type, but got a (possible) enum variant '{}'. Only the enum '{}' itself can be used as a type", + String::from_utf8_lossy(last_identifier), + String::from_utf8_lossy(&definition.identifier().value) + ) + )); + } + } + + // Too much identifiers (>1) for an enumeration, or not an + // enumeration and we had more identifiers remaining + LookupResult::Error(( + identifier.position, + format!( + "Unknown type '{}', did you mean to use '{}'?", + String::from_utf8_lossy(&identifier.value), + String::from_utf8_lossy(&definition.identifier().value) + ) + )) + } + } + } + } + } else { + LookupResult::BuiltIn + } } -struct TypeTable { - lookup: HashMap, +/// Debugging function to ensure a type is resolved (calling +/// `lookup_type_definition` and ensuring its return value is `BuiltIn` or +/// `Resolved` +#[cfg(debug_assertions)] +fn ensure_type_definition( + heap: &Heap, types: &TypeTable, symbols: &SymbolTable, + module_root: RootId, type_to_resolve: &PrimitiveType +) { + match lookup_type_definition(heap, types, symbols, module_root, type_to_resolve) { + LookupResult::BuiltIn | LookupResult::Resolved(_) => {}, + LookupResult::Unresolved((_, definition_id)) => { + assert!( + false, + "Expected that type definition for {} was resolved by the type table, but it wasn't", + String::from_utf8_lossy(&heap[definition_id].identifier().value) + ) + }, + LookupResult::Error((_, error)) => { + let message = if let PrimitiveType::Symbolic(symbolic) = type_to_resolve { + format!( + "Expected that type definition for {} was resolved by the type table, but it returned: {}", + String::from_utf8_lossy(&symbolic.identifier.value), &error + ) + } else { + format!( + "Expected (non-symbolic!?) type definition to be resolved, but it returned: {}", + &error + ) + }; + assert!(false, "{}", message) + } + } +} + +/// Determines an enumeration's integer representation type, or a union's tag +/// type, using the maximum value of the tag. The returned type is always a +/// builtin type. +/// TODO: Fix for maximum u64 value +fn enum_representation(max_tag_value: i64) -> PrimitiveType { + if max_tag_value <= u8::max_value() as i64 { + PrimitiveType::Byte + } else if max_tag_value <= u16::max_value() as i64 { + PrimitiveType::Short + } else if max_tag_value <= u32::max_value() as i64 { + PrimitiveType::Int + } else { + PrimitiveType::Long + } } \ No newline at end of file