diff --git a/src/protocol/ast.rs b/src/protocol/ast.rs index d81f79855f2d874722ac8ac8596ef90cbb493b89..03c0c16a470160e0af118fd5a909495f38d34fbb 100644 --- a/src/protocol/ast.rs +++ b/src/protocol/ast.rs @@ -471,14 +471,7 @@ impl IndexMut for Heap { impl Index for Heap { type Output = ParserType; fn index(&self, index: ParserTypeId) -> &Self::Output { - &self.parser_types[index.index] - } -} - -impl Index for Heap { - type Output = TypeAnnotation; - fn index(&self, index: TypeAnnotationId) -> &Self::Output { - &self.type_annotations[index] + &self.parser_types[index] } } @@ -1037,6 +1030,7 @@ impl Display for Identifier { /// TODO: @cleanup Maybe handle this differently, preallocate in heap? The /// reason I'm handling it like this now is so we don't allocate types in /// the `Arena` structure if they're the common types defined here. +#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] pub enum ParserTypeVariant { // Basic builtin Message, @@ -1059,7 +1053,7 @@ pub enum ParserTypeVariant { impl ParserTypeVariant { pub(crate) fn supports_polymorphic_args(&self) -> bool { use ParserTypeVariant::*; - match ParserTypeVariant { + match self { Message | Bool | Byte | Short | Int | Long | String | IntegerLiteral | Inferred => false, _ => true } @@ -1070,7 +1064,7 @@ impl ParserTypeVariant { /// linker/validator phase of the compilation process. These types may be /// (partially) inferred or represent literals (e.g. a integer whose bytesize is /// not yet determined). -#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)] +#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] pub struct ParserType { pub this: ParserTypeId, pub pos: InputPosition, @@ -1091,7 +1085,8 @@ pub struct SymbolicParserType { /// need to be inferred. Otherwise the number of polymorphic arguments must /// match those of the corresponding definition pub poly_args: Vec, - // Phase 2: validation/linking + // Phase 2: validation/linking (for types in function/component bodies) and + // type table construction (for embedded types of structs/unions) pub variant: Option } @@ -1211,20 +1206,6 @@ impl Display for Type { } } -#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] -pub struct TypeAnnotation { - pub this: TypeAnnotationId, - // Phase 1: parser - pub position: InputPosition, - pub the_type: Type, -} - -impl SyntaxElement for TypeAnnotation { - fn position(&self) -> InputPosition { - self.position - } -} - type CharacterData = Vec; type IntegerData = i64; @@ -1348,12 +1329,6 @@ impl Variable { _ => panic!("Unable to cast 'Variable' to 'Local'"), } } - pub fn the_type<'b>(&self, h: &'b Heap) -> &'b Type { - match self { - Variable::Parameter(param) => &h[param.type_annotation].the_type, - Variable::Local(local) => &h[local.type_annotation].the_type, - } - } } impl SyntaxElement for Variable { @@ -1582,68 +1557,68 @@ impl SyntaxElement for Function { self.position } } - -#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] -pub enum Signature { - Component(ComponentSignature), - Function(FunctionSignature), -} - -impl Signature { - pub fn from_definition(h: &Heap, def: DefinitionId) -> Signature { - // TODO: Fix this - match &h[def] { - Definition::Component(com) => Signature::Component(ComponentSignature { - identifier: com.identifier.clone(), // TODO: @fix - arity: Signature::convert_parameters(h, &com.parameters), - }), - Definition::Function(fun) => Signature::Function(FunctionSignature { - return_type: h[fun.return_type].the_type.clone(), - identifier: fun.identifier.clone(), // TODO: @fix - arity: Signature::convert_parameters(h, &fun.parameters), - }), - _ => panic!("cannot retrieve signature (for StructDefinition or EnumDefinition)") - } - } - fn convert_parameters(h: &Heap, params: &Vec) -> Vec { - let mut result = Vec::new(); - for ¶m in params.iter() { - result.push(h[h[param].type_annotation].the_type.clone()); - } - result - } - fn identifier(&self) -> &Identifier { - match self { - Signature::Component(com) => &com.identifier, - Signature::Function(fun) => &fun.identifier, - } - } - pub fn is_component(&self) -> bool { - match self { - Signature::Component(_) => true, - Signature::Function(_) => false, - } - } - pub fn is_function(&self) -> bool { - match self { - Signature::Component(_) => false, - Signature::Function(_) => true, - } - } -} - -#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] -pub struct ComponentSignature { - pub identifier: Identifier, - pub arity: Vec, -} - -#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] -pub struct FunctionSignature { - pub return_type: Type, - pub identifier: Identifier, - pub arity: Vec, -} +// TODO: @remove ??? +// #[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] +// pub enum Signature { +// Component(ComponentSignature), +// Function(FunctionSignature), +// } +// +// impl Signature { +// pub fn from_definition(h: &Heap, def: DefinitionId) -> Signature { +// // TODO: Fix this +// match &h[def] { +// Definition::Component(com) => Signature::Component(ComponentSignature { +// identifier: com.identifier.clone(), // TODO: @fix +// arity: Signature::convert_parameters(h, &com.parameters), +// }), +// Definition::Function(fun) => Signature::Function(FunctionSignature { +// return_type: h[fun.return_type].the_type.clone(), +// identifier: fun.identifier.clone(), // TODO: @fix +// arity: Signature::convert_parameters(h, &fun.parameters), +// }), +// _ => panic!("cannot retrieve signature (for StructDefinition or EnumDefinition)") +// } +// } +// fn convert_parameters(h: &Heap, params: &Vec) -> Vec { +// let mut result = Vec::new(); +// for ¶m in params.iter() { +// result.push(h[h[param].type_annotation].the_type.clone()); +// } +// result +// } +// fn identifier(&self) -> &Identifier { +// match self { +// Signature::Component(com) => &com.identifier, +// Signature::Function(fun) => &fun.identifier, +// } +// } +// pub fn is_component(&self) -> bool { +// match self { +// Signature::Component(_) => true, +// Signature::Function(_) => false, +// } +// } +// pub fn is_function(&self) -> bool { +// match self { +// Signature::Component(_) => false, +// Signature::Function(_) => true, +// } +// } +// } +// +// #[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] +// pub struct ComponentSignature { +// pub identifier: Identifier, +// pub arity: Vec, +// } +// +// #[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] +// pub struct FunctionSignature { +// pub return_type: Type, +// pub identifier: Identifier, +// pub arity: Vec, +// } #[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] pub enum Statement { diff --git a/src/protocol/ast_printer.rs b/src/protocol/ast_printer.rs index 2bd329753b8eb5b381484b4f8d1a6948f0b739d4..74e40d627796dbb44bf0397b49547a060bc1564c 100644 --- a/src/protocol/ast_printer.rs +++ b/src/protocol/ast_printer.rs @@ -289,7 +289,7 @@ impl ASTWriter { .with_s_key("Parameter"); self.kv(indent4).with_s_key("Name").with_ascii_val(¶m.identifier.value); - self.kv(indent4).with_s_key("Type").with_custom_val(|w| write_type(w, &heap[param.type_annotation])); + self.kv(indent4).with_s_key("Type").with_custom_val(|w| write_type(w, heap, &heap[param.parser_type])); } self.kv(indent2).with_s_key("Body"); @@ -613,7 +613,7 @@ impl ASTWriter { self.kv(indent2).with_s_key("Name").with_ascii_val(&local.identifier.value); self.kv(indent2).with_s_key("Type") - .with_custom_val(|w| write_type(w, &heap[local.type_annotation])); + .with_custom_val(|w| write_type(w, heap, &heap[local.parser_type])); } //-------------------------------------------------------------------------- @@ -638,24 +638,35 @@ fn write_option(target: &mut String, value: Option) { }; } -fn write_type(target: &mut String, t: &TypeAnnotation) { - match &t.the_type.primitive { - PrimitiveType::Input => target.write_str("in"), - PrimitiveType::Output => target.write_str("out"), - PrimitiveType::Message => target.write_str("msg"), - PrimitiveType::Boolean => target.write_str("bool"), - PrimitiveType::Byte => target.write_str("byte"), - PrimitiveType::Short => target.write_str("short"), - PrimitiveType::Int => target.write_str("int"), - PrimitiveType::Long => target.write_str("long"), - PrimitiveType::Symbolic(symbolic) => { - let mut temp = String::new(); - write_option(&mut temp, symbolic.definition.map(|v| v.index)); - write!(target, "Symbolic(name: {}, target: {})", String::from_utf8_lossy(&symbolic.identifier.value), &temp) +fn write_type(target: &mut String, heap: &Heap, t: &ParserType) { + use ParserTypeVariant as PTV; + + let mut embedded = Vec::new(); + match &t.variant { + PTV::Input(id) => { target.write_str("in"); embedded.push(*id); } + PTV::Output(id) => { target.write_str("out"); embedded.push(*id) } + PTV::Array(id) => { target.write_str("array"); embedded.push(*id) } + PTV::Message => { target.write_str("msg"); } + PTV::Bool => { target.write_str("bool"); } + PTV::Byte => { target.write_str("byte"); } + PTV::Short => { target.write_str("short"); } + PTV::Int => { target.write_str("int"); } + PTV::Long => { target.write_str("long"); } + PTV::String => { target.write_str("str"); } + PTV::IntegerLiteral => { target.write_str("int_lit"); } + PTV::Inferred => { target.write_str("auto"); } + PTV::Symbolic(symbolic) => { + target.write_str(&String::from_utf8_lossy(&symbolic.identifier.value)); + embedded.extend(&symbolic.poly_args); } }; - if t.the_type.array { - target.push_str("[]"); + if !embedded.is_empty() { + target.write_str("<"); + for (idx, embedded_id) in embedded.into_iter().enumerate() { + if idx != 0 { target.write_str(", "); } + write_type(target, heap, &heap[embedded_id]); + } + target.write_str(">"); } } \ No newline at end of file diff --git a/src/protocol/eval.rs b/src/protocol/eval.rs index 60534eb7e08209072928c3d672a4c2c3c4c8bf61..e8e3c57d5d6e35c67d88aca09a9b2dfc626e7737 100644 --- a/src/protocol/eval.rs +++ b/src/protocol/eval.rs @@ -21,9 +21,13 @@ const MESSAGE_MAX_LENGTH: i64 = SHORT_MAX; const ONE: Value = Value::Byte(ByteValue(1)); +// TODO: All goes one day anyway, so dirty typechecking hack trait ValueImpl { fn exact_type(&self) -> Type; - fn is_type_compatible(&self, t: &Type) -> bool; + fn is_type_compatible(&self, h: &Heap, t: &ParserType) -> bool { + Self::is_type_compatible_hack(h, t) + } + fn is_type_compatible_hack(h: &Heap, t: &ParserType) -> bool; } #[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] @@ -838,26 +842,27 @@ impl ValueImpl for Value { Value::LongArray(val) => val.exact_type(), } } - fn is_type_compatible(&self, t: &Type) -> bool { + fn is_type_compatible(&self, h: &Heap, t: &ParserType) -> bool { match self { - Value::Input(val) => val.is_type_compatible(t), - Value::Output(val) => val.is_type_compatible(t), - Value::Message(val) => val.is_type_compatible(t), - Value::Boolean(val) => val.is_type_compatible(t), - Value::Byte(val) => val.is_type_compatible(t), - Value::Short(val) => val.is_type_compatible(t), - Value::Int(val) => val.is_type_compatible(t), - Value::Long(val) => val.is_type_compatible(t), - Value::InputArray(val) => val.is_type_compatible(t), - Value::OutputArray(val) => val.is_type_compatible(t), - Value::MessageArray(val) => val.is_type_compatible(t), - Value::BooleanArray(val) => val.is_type_compatible(t), - Value::ByteArray(val) => val.is_type_compatible(t), - Value::ShortArray(val) => val.is_type_compatible(t), - Value::IntArray(val) => val.is_type_compatible(t), - Value::LongArray(val) => val.is_type_compatible(t), - } - } + Value::Input(_) => InputValue::is_type_compatible_hack(h, t), + Value::Output(_) => OutputValue::is_type_compatible_hack(h, t), + Value::Message(_) => MessageValue::is_type_compatible_hack(h, t), + Value::Boolean(_) => BooleanValue::is_type_compatible_hack(h, t), + Value::Byte(_) => ByteValue::is_type_compatible_hack(h, t), + Value::Short(_) => ShortValue::is_type_compatible_hack(h, t), + Value::Int(_) => IntValue::is_type_compatible_hack(h, t), + Value::Long(_) => LongValue::is_type_compatible_hack(h, t), + Value::InputArray(_) => InputArrayValue::is_type_compatible_hack(h, t), + Value::OutputArray(_) => OutputArrayValue::is_type_compatible_hack(h, t), + Value::MessageArray(_) => MessageArrayValue::is_type_compatible_hack(h, t), + Value::BooleanArray(_) => BooleanArrayValue::is_type_compatible_hack(h, t), + Value::ByteArray(_) => ByteArrayValue::is_type_compatible_hack(h, t), + Value::ShortArray(_) => ShortArrayValue::is_type_compatible_hack(h, t), + Value::IntArray(_) => InputArrayValue::is_type_compatible_hack(h, t), + Value::LongArray(_) => LongArrayValue::is_type_compatible_hack(h, t), + } + } + fn is_type_compatible_hack(_h: &Heap, _t: &ParserType) -> bool { false } } impl Display for Value { @@ -898,15 +903,8 @@ impl ValueImpl for InputValue { fn exact_type(&self) -> Type { Type::INPUT } - fn is_type_compatible(&self, t: &Type) -> bool { - let Type { primitive, array } = t; - if *array { - return false; - } - match primitive { - PrimitiveType::Input => true, - _ => false, - } + fn is_type_compatible_hack(_h: &Heap, t: &ParserType) -> bool { + return if let ParserTypeVariant::Input(_) = t.variant { true } else { false } } } @@ -923,15 +921,8 @@ impl ValueImpl for OutputValue { fn exact_type(&self) -> Type { Type::OUTPUT } - fn is_type_compatible(&self, t: &Type) -> bool { - let Type { primitive, array } = t; - if *array { - return false; - } - match primitive { - PrimitiveType::Output => true, - _ => false, - } + fn is_type_compatible_hack(_h: &Heap, t: &ParserType) -> bool { + return if let ParserTypeVariant::Output(_) = t.variant { true } else { false } } } @@ -958,15 +949,8 @@ impl ValueImpl for MessageValue { fn exact_type(&self) -> Type { Type::MESSAGE } - fn is_type_compatible(&self, t: &Type) -> bool { - let Type { primitive, array } = t; - if *array { - return false; - } - match primitive { - PrimitiveType::Message => true, - _ => false, - } + fn is_type_compatible_hack(_h: &Heap, t: &ParserType) -> bool { + return if let ParserTypeVariant::Message = t.variant { true } else { false }; } } @@ -983,18 +967,11 @@ impl ValueImpl for BooleanValue { fn exact_type(&self) -> Type { Type::BOOLEAN } - fn is_type_compatible(&self, t: &Type) -> bool { - let Type { primitive, array } = t; - if *array { - return false; - } - match primitive { - PrimitiveType::Boolean => true, - PrimitiveType::Byte => true, - PrimitiveType::Short => true, - PrimitiveType::Int => true, - PrimitiveType::Long => true, - _ => false, + fn is_type_compatible_hack(_h: &Heap, t: &ParserType) -> bool { + use ParserTypeVariant::*; + match t.variant { + Bool | Byte | Short | Int | Long => true, + _ => false } } } @@ -1012,17 +989,11 @@ impl ValueImpl for ByteValue { fn exact_type(&self) -> Type { Type::BYTE } - fn is_type_compatible(&self, t: &Type) -> bool { - let Type { primitive, array } = t; - if *array { - return false; - } - match primitive { - PrimitiveType::Byte => true, - PrimitiveType::Short => true, - PrimitiveType::Int => true, - PrimitiveType::Long => true, - _ => false, + fn is_type_compatible_hack(_h: &Heap, t: &ParserType) -> bool { + use ParserTypeVariant::*; + match t.variant { + Byte | Short | Int | Long => true, + _ => false } } } @@ -1040,16 +1011,11 @@ impl ValueImpl for ShortValue { fn exact_type(&self) -> Type { Type::SHORT } - fn is_type_compatible(&self, t: &Type) -> bool { - let Type { primitive, array } = t; - if *array { - return false; - } - match primitive { - PrimitiveType::Short => true, - PrimitiveType::Int => true, - PrimitiveType::Long => true, - _ => false, + fn is_type_compatible_hack(_h: &Heap, t: &ParserType) -> bool { + use ParserTypeVariant::*; + match t.variant { + Short | Int | Long => true, + _ => false } } } @@ -1067,15 +1033,11 @@ impl ValueImpl for IntValue { fn exact_type(&self) -> Type { Type::INT } - fn is_type_compatible(&self, t: &Type) -> bool { - let Type { primitive, array } = t; - if *array { - return false; - } - match primitive { - PrimitiveType::Int => true, - PrimitiveType::Long => true, - _ => false, + fn is_type_compatible_hack(_h: &Heap, t: &ParserType) -> bool { + use ParserTypeVariant::*; + match t.variant { + Int | Long => true, + _ => false } } } @@ -1093,15 +1055,15 @@ impl ValueImpl for LongValue { fn exact_type(&self) -> Type { Type::LONG } - fn is_type_compatible(&self, t: &Type) -> bool { - let Type { primitive, array } = t; - if *array { - return false; - } - match primitive { - PrimitiveType::Long => true, - _ => false, - } + fn is_type_compatible_hack(_h: &Heap, t: &ParserType) -> bool { + return if let ParserTypeVariant::Long = t.variant { true } else { false } + } +} + +fn get_array_inner(t: &ParserType) -> Option { + match t.variant { + ParserTypeVariant::Array(inner) => Some(inner), + _ => None } } @@ -1127,15 +1089,10 @@ impl ValueImpl for InputArrayValue { fn exact_type(&self) -> Type { Type::INPUT_ARRAY } - fn is_type_compatible(&self, t: &Type) -> bool { - let Type { primitive, array } = t; - if !*array { - return false; - } - match primitive { - PrimitiveType::Input => true, - _ => false, - } + fn is_type_compatible_hack(h: &Heap, t: &ParserType) -> bool { + get_array_inner(t) + .map(|v| InputValue::is_type_compatible_hack(h, &h[v])) + .unwrap_or(false) } } @@ -1161,15 +1118,10 @@ impl ValueImpl for OutputArrayValue { fn exact_type(&self) -> Type { Type::OUTPUT_ARRAY } - fn is_type_compatible(&self, t: &Type) -> bool { - let Type { primitive, array } = t; - if !*array { - return false; - } - match primitive { - PrimitiveType::Output => true, - _ => false, - } + fn is_type_compatible_hack(h: &Heap, t: &ParserType) -> bool { + get_array_inner(t) + .map(|v| OutputValue::is_type_compatible_hack(h, &h[v])) + .unwrap_or(false) } } @@ -1195,15 +1147,10 @@ impl ValueImpl for MessageArrayValue { fn exact_type(&self) -> Type { Type::MESSAGE_ARRAY } - fn is_type_compatible(&self, t: &Type) -> bool { - let Type { primitive, array } = t; - if !*array { - return false; - } - match primitive { - PrimitiveType::Message => true, - _ => false, - } + fn is_type_compatible_hack(h: &Heap, t: &ParserType) -> bool { + get_array_inner(t) + .map(|v| MessageValue::is_type_compatible_hack(h, &h[v])) + .unwrap_or(false) } } @@ -1229,15 +1176,10 @@ impl ValueImpl for BooleanArrayValue { fn exact_type(&self) -> Type { Type::BOOLEAN_ARRAY } - fn is_type_compatible(&self, t: &Type) -> bool { - let Type { primitive, array } = t; - if !*array { - return false; - } - match primitive { - PrimitiveType::Boolean => true, - _ => false, - } + fn is_type_compatible_hack(h: &Heap, t: &ParserType) -> bool { + get_array_inner(t) + .map(|v| BooleanValue::is_type_compatible_hack(h, &h[v])) + .unwrap_or(false) } } @@ -1263,15 +1205,10 @@ impl ValueImpl for ByteArrayValue { fn exact_type(&self) -> Type { Type::BYTE_ARRAY } - fn is_type_compatible(&self, t: &Type) -> bool { - let Type { primitive, array } = t; - if !*array { - return false; - } - match primitive { - PrimitiveType::Byte => true, - _ => false, - } + fn is_type_compatible_hack(h: &Heap, t: &ParserType) -> bool { + get_array_inner(t) + .map(|v| ByteValue::is_type_compatible_hack(h, &h[v])) + .unwrap_or(false) } } @@ -1297,15 +1234,10 @@ impl ValueImpl for ShortArrayValue { fn exact_type(&self) -> Type { Type::SHORT_ARRAY } - fn is_type_compatible(&self, t: &Type) -> bool { - let Type { primitive, array } = t; - if !*array { - return false; - } - match primitive { - PrimitiveType::Short => true, - _ => false, - } + fn is_type_compatible_hack(h: &Heap, t: &ParserType) -> bool { + get_array_inner(t) + .map(|v| ShortValue::is_type_compatible_hack(h, &h[v])) + .unwrap_or(false) } } @@ -1331,15 +1263,10 @@ impl ValueImpl for IntArrayValue { fn exact_type(&self) -> Type { Type::INT_ARRAY } - fn is_type_compatible(&self, t: &Type) -> bool { - let Type { primitive, array } = t; - if !*array { - return false; - } - match primitive { - PrimitiveType::Int => true, - _ => false, - } + fn is_type_compatible_hack(h: &Heap, t: &ParserType) -> bool { + get_array_inner(t) + .map(|v| IntValue::is_type_compatible_hack(h, &h[v])) + .unwrap_or(false) } } @@ -1365,15 +1292,10 @@ impl ValueImpl for LongArrayValue { fn exact_type(&self) -> Type { Type::LONG_ARRAY } - fn is_type_compatible(&self, t: &Type) -> bool { - let Type { primitive, array } = t; - if !*array { - return false; - } - match primitive { - PrimitiveType::Long => true, - _ => false, - } + fn is_type_compatible_hack(h: &Heap, t: &ParserType) -> bool { + get_array_inner(t) + .map(|v| LongValue::is_type_compatible_hack(h, &h[v])) + .unwrap_or(false) } } @@ -1387,8 +1309,11 @@ impl Store { } fn initialize(&mut self, h: &Heap, var: VariableId, value: Value) { // Ensure value is compatible with type of variable - let the_type = h[var].the_type(h); - assert!(value.is_type_compatible(the_type)); + let parser_type = match &h[var] { + Variable::Local(v) => v.parser_type, + Variable::Parameter(v) => v.parser_type, + }; + assert!(value.is_type_compatible(h, &h[parser_type])); // Overwrite mapping self.map.insert(var, value.clone()); } @@ -1403,8 +1328,12 @@ impl Store { Expression::Variable(var) => { let var = var.declaration.unwrap(); // Ensure value is compatible with type of variable - let the_type = h[var].the_type(h); - assert!(value.is_type_compatible(the_type)); + let parser_type_id = match &h[var] { + Variable::Local(v) => v.parser_type, + Variable::Parameter(v) => v.parser_type + }; + let parser_type = &h[parser_type_id]; + assert!(value.is_type_compatible(h, parser_type)); // Overwrite mapping self.map.insert(var, value.clone()); Ok(value) @@ -1631,8 +1560,8 @@ impl Prompt { assert_eq!(params.len(), args.len()); for (param, value) in params.iter().zip(args.iter()) { let hparam = &h[*param]; - let type_annot = &h[hparam.type_annotation]; - assert!(value.is_type_compatible(&type_annot.the_type)); + let parser_type = &h[hparam.parser_type]; + assert!(value.is_type_compatible(h, parser_type)); self.store.initialize(h, param.upcast(), value.clone()); } } diff --git a/src/protocol/lexer.rs b/src/protocol/lexer.rs index 1f9139daca0961a9871daf0ad625ca6bb45ff1f1..ca6752201592a4531edefc8629fa3256315e1765 100644 --- a/src/protocol/lexer.rs +++ b/src/protocol/lexer.rs @@ -156,6 +156,13 @@ impl Lexer<'_> { Ok(()) } } + fn consume_any_chars(&mut self) { + if !is_ident_start(self.source.next()) { return } + self.source.consume(); + while is_ident_rest(self.source.next()) { + self.source.consume() + } + } fn has_keyword(&self, keyword: &[u8]) -> bool { if !self.source.has(keyword) { return false; @@ -367,9 +374,9 @@ impl Lexer<'_> { heap: &mut Heap, port_pos: &InputPosition, args: Vec, - | { + | -> Result { match args.len() { - 0 => Ok(h.alloc_parser_type(|this| ParserType{ + 0 => Ok(heap.alloc_parser_type(|this| ParserType{ this, pos: port_pos.clone(), variant: ParserTypeVariant::Inferred @@ -418,7 +425,7 @@ impl Lexer<'_> { self.consume_keyword(b"in"); let poly_args = self.consume_polymorphic_args(h, allow_inference)?; let poly_arg = reduce_port_poly_args(h, &pos, poly_args) - .map_err(|| ParseError2::new_error( + .map_err(|_| ParseError2::new_error( &self.source, pos, "'in' type only accepts up to 1 polymorphic argument" ))?; ParserTypeVariant::Input(poly_arg) @@ -426,7 +433,7 @@ impl Lexer<'_> { self.consume_keyword(b"out"); let poly_args = self.consume_polymorphic_args(h, allow_inference)?; let poly_arg = reduce_port_poly_args(h, &pos, poly_args) - .map_err(|| ParseError2::new_error( + .map_err(|_| ParseError2::new_error( &self.source, pos, "'out' type only accepts up to 1 polymorphic argument" ))?; ParserTypeVariant::Output(poly_arg) @@ -440,7 +447,7 @@ impl Lexer<'_> { // If the type was a basic type (not supporting polymorphic type // arguments), then we make sure the user did not specify any of them. let mut backup_pos = self.source.pos(); - if !parser_type.supports_polymorphic_args() { + if !parser_type_variant.supports_polymorphic_args() { self.consume_whitespace(false)?; if let Some(b'<') = self.source.next() { return Err(ParseError2::new_error( @@ -485,6 +492,72 @@ impl Lexer<'_> { Ok(parser_type_id) } + /// Consumes things that look like types. If everything seems to look like + /// a type then `true` will be returned and the input position will be + /// placed after the type. If it doesn't appear to be a type then `false` + /// will be returned. + /// TODO: @cleanup, this is not particularly pretty or robust, methinks + fn maybe_consume_type_spilled(&mut self) -> bool { + // Spilling polymorphic args. Don't care about the input position + fn maybe_consume_polymorphic_args(v: &mut Lexer) -> bool { + if v.consume_whitespace(false).is_err() { return false; } + if let Some(b'<') = v.source.next() { + v.source.consume(); + if v.consume_whitespace(false).is_err() { return false; } + loop { + if !maybe_consume_type_inner(v) { return false; } + if v.consume_whitespace(false).is_err() { return false; } + let has_comma = v.source.next() == Some(b','); + if has_comma { + v.source.consume(); + if v.consume_whitespace(false).is_err() { return false; } + } + if let Some(b'>') = v.source.next() { + v.source.consume(); + break; + } else if !has_comma { + return false; + } + } + } + return true; + } + + // Inner recursive type parser. This method simply advances the lexer + // and does not store the backup position in case parsing fails + fn maybe_consume_type_inner(v: &mut Lexer) -> bool { + // Consume type identifier and optional polymorphic args + if v.has_type_keyword() { + v.consume_any_chars() + } else { + let ident = v.consume_namespaced_identifier(); + if ident.is_err() { return false } + } + + if !maybe_consume_polymorphic_args(v) { return false; } + + // Check if wrapped in array + if v.consume_whitespace(false).is_err() { return false } + while let Some(b'[') = v.source.next() { + v.source.consume(); + if v.consume_whitespace(false).is_err() { return false; } + if Some(b']') != v.source.next() { return false; } + v.source.consume(); + } + + return true; + } + + let backup_pos = self.source.pos(); + if !maybe_consume_type_inner(self) { + // Not a type + self.source.seek(backup_pos); + return false; + } + + return true; + } + /// Consumes polymorphic arguments and its delimiters if specified. The /// input position may be at whitespace. If polyargs are present then the /// whitespace and the args are consumed and the input position will be @@ -582,85 +655,85 @@ impl Lexer<'_> { } } - fn consume_primitive_type(&mut self) -> Result { - if self.has_keyword(b"in") { - self.consume_keyword(b"in")?; - Ok(PrimitiveType::Input) - } else if self.has_keyword(b"out") { - self.consume_keyword(b"out")?; - Ok(PrimitiveType::Output) - } else if self.has_keyword(b"msg") { - self.consume_keyword(b"msg")?; - Ok(PrimitiveType::Message) - } else if self.has_keyword(b"boolean") { - self.consume_keyword(b"boolean")?; - Ok(PrimitiveType::Boolean) - } else if self.has_keyword(b"byte") { - self.consume_keyword(b"byte")?; - Ok(PrimitiveType::Byte) - } else if self.has_keyword(b"short") { - self.consume_keyword(b"short")?; - Ok(PrimitiveType::Short) - } else if self.has_keyword(b"int") { - self.consume_keyword(b"int")?; - Ok(PrimitiveType::Int) - } else if self.has_keyword(b"long") { - self.consume_keyword(b"long")?; - Ok(PrimitiveType::Long) - } else if self.has_keyword(b"auto") { - // TODO: @types - return Err(self.error_at_pos("inferred types using 'auto' are reserved, but not yet implemented")); - } else { - let identifier = self.consume_namespaced_identifier()?; - Ok(PrimitiveType::Symbolic(PrimitiveSymbolic{ - identifier, - definition: None - })) - } - } - fn has_array(&mut self) -> bool { - let backup_pos = self.source.pos(); - let mut result = false; - match self.consume_whitespace(false) { - Ok(_) => result = self.has_string(b"["), - Err(_) => {} - } - self.source.seek(backup_pos); - return result; - } - fn consume_type(&mut self) -> Result { - let primitive = self.consume_primitive_type()?; - let array; - if self.has_array() { - self.consume_string(b"[]")?; - array = true; - } else { - array = false; - } - Ok(Type { primitive, array }) - } - fn create_type_annotation_input(&self, h: &mut Heap) -> Result { - let position = self.source.pos(); - let the_type = Type::INPUT; - let id = h.alloc_type_annotation(|this| TypeAnnotation { this, position, the_type }); - Ok(id) - } - fn create_type_annotation_output(&self, h: &mut Heap) -> Result { - let position = self.source.pos(); - let the_type = Type::OUTPUT; - let id = h.alloc_type_annotation(|this| TypeAnnotation { this, position, the_type }); - Ok(id) - } - fn consume_type_annotation(&mut self, h: &mut Heap) -> Result { - let position = self.source.pos(); - let the_type = self.consume_type()?; - let id = h.alloc_type_annotation(|this| TypeAnnotation { this, position, the_type }); - Ok(id) - } - fn consume_type_annotation_spilled(&mut self) -> Result<(), ParseError2> { - self.consume_type()?; - Ok(()) - } + // fn consume_primitive_type(&mut self) -> Result { + // if self.has_keyword(b"in") { + // self.consume_keyword(b"in")?; + // Ok(PrimitiveType::Input) + // } else if self.has_keyword(b"out") { + // self.consume_keyword(b"out")?; + // Ok(PrimitiveType::Output) + // } else if self.has_keyword(b"msg") { + // self.consume_keyword(b"msg")?; + // Ok(PrimitiveType::Message) + // } else if self.has_keyword(b"boolean") { + // self.consume_keyword(b"boolean")?; + // Ok(PrimitiveType::Boolean) + // } else if self.has_keyword(b"byte") { + // self.consume_keyword(b"byte")?; + // Ok(PrimitiveType::Byte) + // } else if self.has_keyword(b"short") { + // self.consume_keyword(b"short")?; + // Ok(PrimitiveType::Short) + // } else if self.has_keyword(b"int") { + // self.consume_keyword(b"int")?; + // Ok(PrimitiveType::Int) + // } else if self.has_keyword(b"long") { + // self.consume_keyword(b"long")?; + // Ok(PrimitiveType::Long) + // } else if self.has_keyword(b"auto") { + // // TODO: @types + // return Err(self.error_at_pos("inferred types using 'auto' are reserved, but not yet implemented")); + // } else { + // let identifier = self.consume_namespaced_identifier()?; + // Ok(PrimitiveType::Symbolic(PrimitiveSymbolic{ + // identifier, + // definition: None + // })) + // } + // } + // fn has_array(&mut self) -> bool { + // let backup_pos = self.source.pos(); + // let mut result = false; + // match self.consume_whitespace(false) { + // Ok(_) => result = self.has_string(b"["), + // Err(_) => {} + // } + // self.source.seek(backup_pos); + // return result; + // } + // fn consume_type(&mut self) -> Result { + // let primitive = self.consume_primitive_type()?; + // let array; + // if self.has_array() { + // self.consume_string(b"[]")?; + // array = true; + // } else { + // array = false; + // } + // Ok(Type { primitive, array }) + // } + // fn create_type_annotation_input(&self, h: &mut Heap) -> Result { + // let position = self.source.pos(); + // let the_type = Type::INPUT; + // let id = h.alloc_type_annotation(|this| TypeAnnotation { this, position, the_type }); + // Ok(id) + // } + // fn create_type_annotation_output(&self, h: &mut Heap) -> Result { + // let position = self.source.pos(); + // let the_type = Type::OUTPUT; + // let id = h.alloc_type_annotation(|this| TypeAnnotation { this, position, the_type }); + // Ok(id) + // } + // fn consume_type_annotation(&mut self, h: &mut Heap) -> Result { + // let position = self.source.pos(); + // let the_type = self.consume_type()?; + // let id = h.alloc_type_annotation(|this| TypeAnnotation { this, position, the_type }); + // Ok(id) + // } + // fn consume_type_annotation_spilled(&mut self) -> Result<(), ParseError2> { + // self.consume_type()?; + // Ok(()) + // } // Parameters @@ -1528,11 +1601,13 @@ impl Lexer<'_> { } let backup_pos = self.source.pos(); let mut result = false; - if let Ok(_) = self.consume_type_annotation_spilled() { - if let Ok(_) = self.consume_whitespace(false) { + if self.maybe_consume_type_spilled() { + // We seem to have a valid type, do we now have an identifier? + if self.consume_whitespace(false).is_ok() { result = self.has_identifier(); } } + self.source.seek(backup_pos); return result; } @@ -1607,7 +1682,7 @@ impl Lexer<'_> { // Consume the input port // TODO: Unsure about this, both ports refer to the same ParserType, is this ok? let in_parser_type = h.alloc_parser_type(|this| ParserType{ - this, pos: position.clone(), variant: ParserTypeVariant::Output(poly_arg_id) + this, pos: position.clone(), variant: ParserTypeVariant::Input(poly_arg_id) }); let in_identifier = self.consume_identifier()?; self.consume_whitespace(false)?; @@ -2089,7 +2164,7 @@ impl Lexer<'_> { fn consume_function_definition(&mut self, h: &mut Heap) -> Result { // Consume return type, optional polyvars and identifier let position = self.source.pos(); - let return_type = self.consume_type_annotation(h)?; + let return_type = self.consume_type2(h, false)?; self.consume_whitespace(true)?; let identifier = self.consume_identifier()?; let poly_vars = self.consume_polymorphic_vars()?; @@ -2345,6 +2420,28 @@ mod tests { use crate::protocol::lexer::*; use crate::protocol::inputsource::*; + #[derive(Debug, Eq, PartialEq)] + enum ParserTypeClass { + Message, Bool, Byte, Short, Int, Long, String, Array, Nope + } + impl ParserTypeClass { + fn from(v: &ParserType) -> ParserTypeClass { + use ParserTypeVariant as PTV; + use ParserTypeClass as PTC; + match &v.variant { + PTV::Message => PTC::Message, + PTV::Bool => PTC::Bool, + PTV::Byte => PTC::Byte, + PTV::Short => PTC::Short, + PTV::Int => PTC::Int, + PTV::Long => PTC::Long, + PTV::String => PTC::String, + PTV::Array(_) => PTC::Array, + _ => PTC::Nope, + } + } + } + #[test] fn test_pragmas() { let mut h = Heap::new(); @@ -2449,41 +2546,41 @@ mod tests { assert_eq!(root.definitions.len(), 2); - let symbolic_type = |v: &PrimitiveType| -> Vec { - if let PrimitiveType::Symbolic(v) = v { - v.identifier.value.clone() - } else { - assert!(false); - unreachable!(); - } - }; + // let symbolic_type = |v: &PrimitiveType| -> Vec { + // if let PrimitiveType::Symbolic(v) = v { + // v.identifier.value.clone() + // } else { + // assert!(false); + // unreachable!(); + // } + // }; let foo_def = h[root.definitions[0]].as_struct(); assert_eq!(foo_def.identifier.value, b"Foo"); assert_eq!(foo_def.fields.len(), 3); assert_eq!(foo_def.fields[0].field.value, b"one"); - assert_eq!(h[foo_def.fields[0].the_type].the_type, Type::BYTE); + assert_eq!(ParserTypeClass::from(&h[foo_def.fields[0].parser_type]), ParserTypeClass::Byte); assert_eq!(foo_def.fields[1].field.value, b"two"); - assert_eq!(h[foo_def.fields[1].the_type].the_type, Type::SHORT); + assert_eq!(ParserTypeClass::from(&h[foo_def.fields[1].parser_type]), ParserTypeClass::Short); assert_eq!(foo_def.fields[2].field.value, b"three"); - assert_eq!( - symbolic_type(&h[foo_def.fields[2].the_type].the_type.primitive), - Vec::from("Bar".as_bytes()) - ); + // assert_eq!( + // symbolic_type(&h[foo_def.fields[2].the_type].the_type.primitive), + // Vec::from("Bar".as_bytes()) + // ); let bar_def = h[root.definitions[1]].as_struct(); assert_eq!(bar_def.identifier.value, b"Bar"); assert_eq!(bar_def.fields.len(), 3); assert_eq!(bar_def.fields[0].field.value, b"one"); - assert_eq!(h[bar_def.fields[0].the_type].the_type, Type::INT_ARRAY); + assert_eq!(ParserTypeClass::from(&h[bar_def.fields[0].parser_type]), ParserTypeClass::Array); assert_eq!(bar_def.fields[1].field.value, b"two"); - assert_eq!(h[bar_def.fields[1].the_type].the_type, Type::INT_ARRAY); + assert_eq!(ParserTypeClass::from(&h[bar_def.fields[1].parser_type]), ParserTypeClass::Array); assert_eq!(bar_def.fields[2].field.value, b"three"); - assert_eq!(h[bar_def.fields[2].the_type].the_type.array, true); - assert_eq!( - symbolic_type(&h[bar_def.fields[2].the_type].the_type.primitive), - Vec::from("Qux".as_bytes()) - ); + assert_eq!(ParserTypeClass::from(&h[bar_def.fields[2].parser_type]), ParserTypeClass::Array); + // assert_eq!( + // symbolic_type(&h[bar_def.fields[2].parser_type].the_type.primitive), + // Vec::from("Qux".as_bytes()) + // ); } #[test] @@ -2529,7 +2626,7 @@ mod tests { assert_eq!(bar_def.variants[2].value, EnumVariantValue::None); let qux_def = h[root.definitions[2]].as_enum(); - let enum_type = |value: &EnumVariantValue| -> &TypeAnnotation { + let enum_type = |value: &EnumVariantValue| -> &ParserType { if let EnumVariantValue::Type(t) = value { &h[*t] } else { @@ -2540,15 +2637,15 @@ mod tests { assert_eq!(qux_def.identifier.value, b"Qux"); assert_eq!(qux_def.variants.len(), 3); assert_eq!(qux_def.variants[0].identifier.value, b"A"); - assert_eq!(enum_type(&qux_def.variants[0].value).the_type, Type::BYTE_ARRAY); + assert_eq!(ParserTypeClass::from(enum_type(&qux_def.variants[0].value)), ParserTypeClass::Array); assert_eq!(qux_def.variants[1].identifier.value, b"B"); - assert_eq!(enum_type(&qux_def.variants[1].value).the_type.array, true); - if let PrimitiveType::Symbolic(t) = &enum_type(&qux_def.variants[1].value).the_type.primitive { - assert_eq!(t.identifier.value, Vec::from("Bar".as_bytes())); - } else { assert!(false) } + assert_eq!(ParserTypeClass::from(enum_type(&qux_def.variants[1].value)), ParserTypeClass::Array); + // if let PrimitiveType::Symbolic(t) = &enum_type(&qux_def.variants[1].value).the_type.primitive { + // assert_eq!(t.identifier.value, Vec::from("Bar".as_bytes())); + // } else { assert!(false) } assert_eq!(qux_def.variants[2].identifier.value, b"C"); - assert_eq!(enum_type(&qux_def.variants[2].value).the_type, Type::BYTE); + assert_eq!(ParserTypeClass::from(enum_type(&qux_def.variants[2].value)), ParserTypeClass::Byte); } // #[test] diff --git a/src/protocol/mod.rs b/src/protocol/mod.rs index 4035bfc1246afdca8ccc6ba33705861876524214..9ea716fb06b139f56996a305106369224b576835 100644 --- a/src/protocol/mod.rs +++ b/src/protocol/mod.rs @@ -83,12 +83,10 @@ impl ProtocolDescription { } for ¶m in def.parameters().iter() { let param = &h[param]; - let type_annot = &h[param.type_annotation]; - if type_annot.the_type.array { - return Err(NonPortTypeParameters); - } - match type_annot.the_type.primitive { - PrimitiveType::Input | PrimitiveType::Output => continue, + let parser_type = &h[param.parser_type]; + + match parser_type.variant { + ParserTypeVariant::Input(_) | ParserTypeVariant::Output(_) => continue, _ => { return Err(NonPortTypeParameters); } @@ -97,11 +95,11 @@ impl ProtocolDescription { let mut result = Vec::new(); for ¶m in def.parameters().iter() { let param = &h[param]; - let type_annot = &h[param.type_annotation]; - let ptype = &type_annot.the_type.primitive; - if ptype == &PrimitiveType::Input { + let parser_type = &h[param.parser_type]; + + if let ParserTypeVariant::Input(_) = parser_type.variant { result.push(Polarity::Getter) - } else if ptype == &PrimitiveType::Output { + } else if let ParserTypeVariant::Output(_) = parser_type.variant { result.push(Polarity::Putter) } else { unreachable!() diff --git a/src/protocol/parser/depth_visitor.rs b/src/protocol/parser/depth_visitor.rs index e344609f72ad651c643aba0b48b9652c3f08a4ae..0dfa83adc790cb5ca77def228b9686f717e8d5c0 100644 --- a/src/protocol/parser/depth_visitor.rs +++ b/src/protocol/parser/depth_visitor.rs @@ -900,7 +900,7 @@ impl Visitor for LinkStatements { // Update the while's next statement to point to the pseudo-statement let end_while_id = h[stmt].end_while; assert!(end_while_id.is_some()); - let end_while_id = end_while_id.unwrap(); + // let end_while_id = end_while_id.unwrap(); assert!(self.prev.is_none()); self.visit_statement(h, h[stmt].body)?; diff --git a/src/protocol/parser/mod.rs b/src/protocol/parser/mod.rs index 6ea199137f075936102043401f84da271468e207..f4d25bd932e5aaa4234cbbeb06b6ca04031cb1de 100644 --- a/src/protocol/parser/mod.rs +++ b/src/protocol/parser/mod.rs @@ -1,7 +1,7 @@ mod depth_visitor; mod symbol_table; +// mod type_table_old; mod type_table; -mod type_table2; mod type_resolver; mod visitor; mod visitor_linker; @@ -10,7 +10,7 @@ use depth_visitor::*; use symbol_table::SymbolTable; use visitor::Visitor2; use visitor_linker::ValidityAndLinkerVisitor; -use type_table::TypeTable; +use type_table::{TypeTable, TypeCtx}; use crate::protocol::ast::*; use crate::protocol::inputsource::*; @@ -188,7 +188,8 @@ impl Parser { // All imports in the AST are now annotated. We now use the symbol table // to construct the type table. - let type_table = TypeTable::new(&symbol_table, &self.heap, &self.modules)?; + let type_ctx = TypeCtx::new(&symbol_table, &self.heap, &self.modules); + let type_table = TypeTable::new(&type_ctx)?; Ok((symbol_table, type_table)) } @@ -214,9 +215,9 @@ impl Parser { return Err(ParseError2::new_error(&self.modules[0].source, position, &message)) } - // let mut writer = ASTWriter::new(); - // let mut file = std::fs::File::create(std::path::Path::new("ast.txt")).unwrap(); - // writer.write_ast(&mut file, &self.heap); + let mut writer = ASTWriter::new(); + let mut file = std::fs::File::create(std::path::Path::new("ast.txt")).unwrap(); + writer.write_ast(&mut file, &self.heap); Ok(root_id) } diff --git a/src/protocol/parser/type_table.rs b/src/protocol/parser/type_table.rs index 7785670279273b406be984c3421516d1793d3a5a..f7dd586908f6c00ca4d8a62d9b4af19472e61536 100644 --- a/src/protocol/parser/type_table.rs +++ b/src/protocol/parser/type_table.rs @@ -1,11 +1,67 @@ -// TODO: @fix PrimitiveType for enums/unions +/** +TypeTable + +Contains the type table: a datastructure that, when compilation succeeds, +contains a concrete type definition for each AST type definition. In general +terms the type table will go through the following phases during the compilation +process: + + 1. The base type definitions are resolved after the parser phase has + finished. This implies that the AST is fully constructed, but not yet + annotated. + 2. With the base type definitions resolved, the validation/linker phase will + use the type table (together with the symbol table) to disambiguate + terms (e.g. does an expression refer to a variable, an enum, a constant, + etc.) + 3. During the type checking/inference phase the type table is used to ensure + that the AST contains valid use of types in expressions and statements. + At the same time type inference will find concrete instantiations of + polymorphic types, these will be stored in the type table as monomorphed + instantiations of a generic type. + 4. After type checking and inference (and possibly when constructing byte + code) the type table will construct a type graph and solidify each + non-polymorphic type and monomorphed instantiations of polymorphic types + into concrete types. + +So a base type is defined by its (optionally polymorphic) representation in the +AST. A concrete type has concrete types for each of the polymorphic arguments. A +struct, enum or union may have polymorphic arguments but not actually be a +polymorphic type. This happens when the polymorphic arguments are not used in +the type definition itself. Similarly for functions/components: but here we just +check the arguments/return type of the signature. + +Apart from base types and concrete types, we also use the term "embedded type" +for types that are embedded within another type, such as a type of a struct +struct field or of a union variant. Embedded types may themselves have +polymorphic arguments and therefore form an embedded type tree. + +NOTE: for now a polymorphic definition of a function/component is illegal if the + polymorphic arguments are not used in the arguments/return type. It should + be legal, but we disallow it for now. + +TODO: Allow potentially cyclic datatypes and reject truly cyclic datatypes. +TODO: Allow for the full potential of polymorphism +TODO: Detect "true" polymorphism: for datatypes like structs/enum/unions this + is simple. For functions we need to check the entire body. Do it here? Or + do it somewhere else? +TODO: Do we want to check fn argument collision here, or in validation phase? +TODO: Make type table an on-demand thing instead of constructing all base types. +TODO: Cleanup everything, feels like a lot can be written cleaner and with less + assumptions on each function call. +// TODO: Review all comments +*/ + +use std::fmt::{Formatter, Result as FmtResult}; +use std::collections::{HashMap, VecDeque}; + use crate::protocol::ast::*; use crate::protocol::parser::symbol_table::{SymbolTable, Symbol}; use crate::protocol::inputsource::*; use crate::protocol::parser::*; -use std::collections::HashMap; -use crate::common::Formatter; +//------------------------------------------------------------------------------ +// Defined Types +//------------------------------------------------------------------------------ #[derive(Copy, Clone, PartialEq, Eq)] pub enum TypeClass { @@ -29,12 +85,28 @@ impl TypeClass { } impl std::fmt::Display for TypeClass { - fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult { write!(f, "{}", self.display_name()) } } -pub enum DefinedType { +/// Struct wrapping around a potentially polymorphic type. If the type does not +/// have any polymorphic arguments then it will not have any monomorphs and +/// `is_polymorph` will be set to `false`. A type with polymorphic arguments +/// only has `is_polymorph` set to `true` if the polymorphic arguments actually +/// appear in the types associated types (function return argument, struct +/// field, enum variant, etc.). Otherwise the polymorphic argument is just a +/// marker and does not influence the bytesize of the type. +pub struct DefinedType { + pub(crate) ast_definition: DefinitionId, + pub(crate) definition: DefinedTypeVariant, + pub(crate) poly_args: Vec, + pub(crate) is_polymorph: bool, + pub(crate) is_pointerlike: bool, + pub(crate) monomorphs: Vec, // TODO: ? +} + +pub enum DefinedTypeVariant { Enum(EnumType), Union(UnionType), Struct(StructType), @@ -42,24 +114,25 @@ pub enum DefinedType { Component(ComponentType) } -impl DefinedType { +pub struct PolyArg { + identifier: Identifier, + /// Whether the polymorphic argument is used directly in the definition of + /// the type (not including bodies of function/component types) + is_in_use: bool, +} + +impl DefinedTypeVariant { pub(crate) fn type_class(&self) -> TypeClass { match self { - DefinedType::Enum(_) => TypeClass::Enum, - DefinedType::Union(_) => TypeClass::Union, - DefinedType::Struct(_) => TypeClass::Struct, - DefinedType::Function(_) => TypeClass::Function, - DefinedType::Component(_) => TypeClass::Component + DefinedTypeVariant::Enum(_) => TypeClass::Enum, + DefinedTypeVariant::Union(_) => TypeClass::Union, + DefinedTypeVariant::Struct(_) => TypeClass::Struct, + DefinedTypeVariant::Function(_) => TypeClass::Function, + DefinedTypeVariant::Component(_) => TypeClass::Component } } } -// TODO: Also support maximum u64 value -pub struct EnumVariant { - identifier: Identifier, - value: i64, -} - /// `EnumType` is the classical C/C++ enum type. It has various variants with /// an assigned integer value. The integer values may be user-defined, /// compiler-defined, or a mix of the two. If a user assigns the same enum @@ -70,10 +143,10 @@ pub struct EnumType { representation: PrimitiveType, } -pub struct UnionVariant { +// TODO: Also support maximum u64 value +pub struct EnumVariant { identifier: Identifier, - embedded_type: Option, - tag_value: i64, + value: i64, } /// `UnionType` is the algebraic datatype (or sum type, or discriminated union). @@ -84,22 +157,23 @@ pub struct UnionType { tag_representation: PrimitiveType } -pub struct StructField { +pub struct UnionVariant { identifier: Identifier, - field_type: TypeAnnotationId, + parser_type: Option, + tag_value: i64, } pub struct StructType { fields: Vec, } -pub struct FunctionArgument { +pub struct StructField { identifier: Identifier, - argument_type: TypeAnnotationId, + parser_type: ParserTypeId, } pub struct FunctionType { - return_type: TypeAnnotationId, + return_type: ParserTypeId, arguments: Vec } @@ -108,590 +182,957 @@ pub struct ComponentType { arguments: Vec } -pub struct TypeTable { - lookup: HashMap, +pub struct FunctionArgument { + identifier: Identifier, + parser_type: ParserTypeId, +} + +//------------------------------------------------------------------------------ +// Type table +//------------------------------------------------------------------------------ + +// TODO: @cleanup Do I really need this, doesn't make the code that much cleaner +struct TypeIterator { + breadcrumbs: Vec<(RootId, DefinitionId)> } -enum LookupResult { +impl TypeIterator { + fn new() -> Self { + Self{ breadcrumbs: Vec::with_capacity(32) } + } + + fn reset(&mut self, root_id: RootId, definition_id: DefinitionId) { + self.breadcrumbs.clear(); + self.breadcrumbs.push((root_id, definition_id)) + } + + fn push(&mut self, root_id: RootId, definition_id: DefinitionId) { + self.breadcrumbs.push((root_id, definition_id)); + } + + fn contains(&self, root_id: RootId, definition_id: DefinitionId) -> bool { + for (stored_root_id, stored_definition_id) in self.breadcrumbs.iter() { + if *stored_root_id == root_id && *stored_definition_id == definition_id { return true; } + } + + return false + } + + fn top(&self) -> Option<(RootId, DefinitionId)> { + self.breadcrumbs.last().map(|(r, d)| (*r, *d)) + } + + fn pop(&mut self) { + debug_assert!(!self.breadcrumbs.is_empty()); + self.breadcrumbs.pop(); + } +} + +#[derive(PartialEq, Eq)] +enum SpecifiedTypeVariant { + // No subtypes + Message, + Bool, + Byte, + Short, + Int, + Long, + String, + // Always one subtype + ArrayOf, + InputOf, + OutputOf, + // Variable number of subtypes, depending on the polymorphic arguments on + // the definition + InstanceOf(DefinitionId, usize) +} + +// #[derive(Eq)] +// struct SpecifiedType { +// /// Definition ID, may not be enough as the type may be polymorphic +// definition: DefinitionId, +// /// The polymorphic types for the definition. These are encoded in a list, +// /// which we interpret as the depth-first serialization of the type tree. +// poly_vars: Vec +// } +// +// impl PartialEq for SpecifiedType { +// fn eq(&self, other: &Self) -> bool { +// // Should point to same definition and have the same polyvars +// if self.definition.index != other.definition.index { return false; } +// if self.poly_vars.len() != other.poly_vars.len() { return false; } +// for (my_var, other_var) in self.poly_vars.iter().zip(other.poly_vars.iter()) { +// if my_var != other_var { return false; } +// } +// +// return true +// } +// } +// +// impl SpecifiedType { +// fn new_non_polymorph(definition: DefinitionId) -> Self { +// Self{ definition, poly_vars: Vec::new() } +// } +// +// fn new_polymorph(definition: DefinitionId, heap: &Heap, parser_type_id: ParserTypeId) -> Self { +// // Serialize into concrete types +// let mut poly_vars = Vec::new(); +// Self::construct_poly_vars(&mut poly_vars, heap, parser_type_id); +// Self{ definition, poly_vars } +// } +// +// fn construct_poly_vars(poly_vars: &mut Vec, heap: &Heap, parser_type_id: ParserTypeId) { +// // Depth-first construction of poly vars +// let parser_type = &heap[parser_type_id]; +// match &parser_type.variant { +// ParserTypeVariant::Message => { poly_vars.push(SpecifiedTypeVariant::Message); }, +// ParserTypeVariant::Bool => { poly_vars.push(SpecifiedTypeVariant::Bool); }, +// ParserTypeVariant::Byte => { poly_vars.push(SpecifiedTypeVariant::Byte); }, +// ParserTypeVariant::Short => { poly_vars.push(SpecifiedTypeVariant::Short); }, +// ParserTypeVariant::Int => { poly_vars.push(SpecifiedTypeVariant::Int); }, +// ParserTypeVariant::Long => { poly_vars.push(SpecifiedTypeVariant::Long); }, +// ParserTypeVariant::String => { poly_vars.push(SpecifiedTypeVariant::String); }, +// ParserTypeVariant::Array(subtype_id) => { +// poly_vars.push(SpecifiedTypeVariant::ArrayOf); +// Self::construct_poly_vars(poly_vars, heap, *subtype_id); +// }, +// ParserTypeVariant::Input(subtype_id) => { +// poly_vars.push(SpecifiedTypeVariant::InputOf); +// Self::construct_poly_vars(poly_vars, heap, *subtype_id); +// }, +// ParserTypeVariant::Output(subtype_id) => { +// poly_vars.push(SpecifiedTypeVariant::OutputOf); +// Self::construct_poly_vars(poly_vars, heap, *subtype_id); +// }, +// ParserTypeVariant::Symbolic(symbolic) => { +// let definition_id = match symbolic.variant { +// SymbolicParserTypeVariant::Definition(definition_id) => definition_id, +// SymbolicParserTypeVariant::PolyArg(_) => { +// // When construct entries in the type table, we no longer allow the +// // unspecified types in the AST, we expect them to be fully inferred. +// debug_assert!(false, "Encountered 'PolyArg' symbolic type. Expected fully inferred types"); +// unreachable!(); +// } +// }; +// +// poly_vars.push(SpecifiedTypeVariant::InstanceOf(definition_id, symbolic.poly_args.len())); +// for subtype_id in &symbolic.poly_args { +// Self::construct_poly_vars(poly_vars, heap, *subtype_id); +// } +// }, +// ParserTypeVariant::IntegerLiteral => { +// debug_assert!(false, "Encountered 'IntegerLiteral' symbolic type. Expected fully inferred types"); +// unreachable!(); +// }, +// ParserTypeVariant::Inferred => { +// debug_assert!(false, "Encountered 'Inferred' symbolic type. Expected fully inferred types"); +// unreachable!(); +// } +// } +// } +// } + +enum ResolveResult { BuiltIn, + PolyArg, Resolved((RootId, DefinitionId)), - Unresolved((RootId, DefinitionId)), - Error((InputPosition, String)), + Unresolved((RootId, DefinitionId)) +} + +pub(crate) struct TypeTable { + /// Lookup from AST DefinitionId to a defined type. Considering possible + /// polymorphs is done inside the `DefinedType` struct. + lookup: HashMap, + /// Iterator over `(module, definition)` tuples used as workspace to make sure + /// that each base definition of all a type's subtypes are resolved. + iter: TypeIterator, + /// Iterator over `parser type`s during the process where `parser types` are + /// resolved into a `(module, definition)` tuple. + parser_type_iter: VecDeque, +} + +pub(crate) struct TypeCtx<'a> { + symbols: &'a SymbolTable, + heap: &'a Heap, + modules: &'a [LexedModule] +} + +impl<'a> TypeCtx<'a> { + pub(crate) fn new(symbols: &'a SymbolTable, heap: &'a Heap, modules: &'a [LexedModule]) -> Self { + Self{ symbols, heap, modules } + } } -/// `TypeTable` is responsible for walking the entire lexed AST and laying out -/// the various user-defined types in terms of bytes and offsets. This process -/// may be pseudo-recursive (meaning: it is implemented in a recursive fashion, -/// but not in the recursive-function-call kind of way) in case a type depends -/// on another type to be resolved. -/// TODO: Distinction between resolved types and unresolved types (regarding the -/// mixed use of BuiltIns and resolved types) is a bit yucky at the moment, -/// will have to come up with something nice in the future. -/// TODO: Need to factor out the repeated lookup in some kind of state machine. impl TypeTable { - pub(crate) fn new( - symbols: &SymbolTable, heap: &Heap, modules: &[LexedModule] - ) -> Result { + /// Construct a new type table without any resolved types. Types will be + /// resolved on-demand. + pub(crate) fn new(ctx: &TypeCtx) -> Result { + // Make sure we're allowed to cast root_id to index into ctx.modules if cfg!(debug_assertions) { - for (index, module) in modules.iter().enumerate() { - debug_assert_eq!(index, module.root_id.index as usize) + for (index, module) in ctx.modules.iter().enumerate() { + debug_assert_eq!(index, module.root_id.index as usize); } } - // Estimate total number of definitions we will encounter - let num_definitions = heap.definitions.len(); - let mut table = TypeTable{ - lookup: HashMap::with_capacity(num_definitions), + // Use context to guess hashmap size + let reserve_size = ctx.heap.definitions.len(); + let mut table = Self{ + lookup: HashMap::with_capacity(reserve_size), + iter: TypeIterator::new(), + parser_type_iter: VecDeque::with_capacity(64), }; - // Perform the breadcrumb-based type parsing. For now we do not allow - // cyclic types. - // TODO: Allow cyclic types. However, we want to have an implementation - // that is somewhat efficient: if a type is cyclic, then we have to - // insert a pointer somewhere. However, we don't want to insert them - // everywhere, for each possible type. Without any further context the - // decision to place a pointer somewhere is "random", we need to know - // how the type is used to have an informed opinion on where to place - // the pointer. - enum Breadcrumb { - Linear((usize, usize)), - Jumping((RootId, DefinitionId)) + for root in ctx.heap.protocol_descriptions.iter() { + for definition_id in &root.definitions { + table.resolve_base_definition(ctx, *definition_id)?; + } } - // Helper to handle the return value from lookup_type_definition. If - // resolved/builtin we return `true`, if an error we complete the error - // and return it. If unresolved then we check for cyclic types and - // return an error if cyclic, otherwise return false (indicating we need - // to continue in the breadcrumb-based resolving loop) - let handle_lookup_result = | - heap: &Heap, all_modules: &[LexedModule], cur_module: &LexedModule, - breadcrumbs: &mut Vec, result: LookupResult - | -> Result { - return match result { - LookupResult::BuiltIn | LookupResult::Resolved(_) => Ok(true), - LookupResult::Unresolved((new_root_id, new_definition_id)) => { - // Check for cyclic dependencies - let mut is_cyclic = false; - for breadcrumb in breadcrumbs.iter() { - let (root_id, definition_id) = match breadcrumb { - Breadcrumb::Linear((root_index, definition_index)) => { - let root_id = all_modules[*root_index].root_id; - let definition_id = heap[root_id].definitions[*definition_index]; - (root_id, definition_id) - }, - Breadcrumb::Jumping(root_id_and_definition_id) => *root_id_and_definition_id - }; + debug_assert_eq!(table.lookup.len(), reserve_size, "mismatch in reserved size of type table"); - if root_id == new_root_id && definition_id == new_definition_id { - // Oh noes! - is_cyclic = true; - break; - } + Ok(table) + } + + /// Retrieves base definition from type table. We must be able to retrieve + /// it as we resolve all base types upon type table construction (for now). + /// However, in the future we might do on-demand type resolving, so return + /// an option anyway + pub(crate) fn get_base_definition(&self, definition_id: &DefinitionId) -> Option<&DefinedType> { + self.lookup.get(&definition_id) + } + + /// This function will resolve just the basic definition of the type, it + /// will not handle any of the monomorphized instances of the type. + fn resolve_base_definition<'a>(&'a mut self, ctx: &TypeCtx, definition_id: DefinitionId) -> Result<(), ParseError2> { + // Check if we have already resolved the base definition + if self.lookup.contains_key(&definition_id) { return Ok(()); } + + let root_id = Self::find_root_id(ctx, definition_id); + self.iter.reset(root_id, definition_id); + + while let Some((root_id, definition_id)) = self.iter.top() { + // We have a type to resolve + let definition = &ctx.heap[definition_id]; + + let can_pop_breadcrumb = match definition { + Definition::Enum(definition) => self.resolve_base_enum_definition(ctx, root_id, definition), + Definition::Struct(definition) => self.resolve_base_struct_definition(ctx, root_id, definition), + Definition::Component(definition) => self.resolve_base_component_definition(ctx, root_id, definition), + Definition::Function(definition) => self.resolve_base_function_definition(ctx, root_id, definition), + }?; + + // Otherwise: `ingest_resolve_result` has pushed a new breadcrumb + // that we must follow before we can resolve the current type + if can_pop_breadcrumb { + self.iter.pop(); + } + } + + // We must have resolved the type + debug_assert!(self.lookup.contains_key(&definition_id), "base type not resolved"); + Ok(()) + } + + /// Resolve the basic enum definition to an entry in the type table. It will + /// not instantiate any monomorphized instances of polymorphic enum + /// definitions. If a subtype has to be resolved first then this function + /// will return `false` after calling `ingest_resolve_result`. + fn resolve_base_enum_definition(&mut self, ctx: &TypeCtx, root_id: RootId, definition: &EnumDefinition) -> Result { + debug_assert!(!self.lookup.contains_key(&definition.this.upcast()), "base enum already resolved"); + + // Check if the enum should be implemented as a classic enumeration or + // a tagged union. Keep track of variant index for error messages. Make + // sure all embedded types are resolved. + let mut first_tag_value = None; + let mut first_int_value = None; + for variant in &definition.variants { + match &variant.value { + EnumVariantValue::None => {}, + EnumVariantValue::Integer(_) => if first_int_value.is_none() { + first_int_value = Some(variant.position); + }, + EnumVariantValue::Type(variant_type_id) => { + if first_tag_value.is_none() { + first_tag_value = Some(variant.position); } - if is_cyclic { - let mut error = ParseError2::new_error( - &all_modules[new_root_id.index as usize].source, - heap[new_definition_id].position(), - "Evaluating this definition results in a a cyclic dependency" - ); - for (index, breadcrumb) in breadcrumbs.iter().enumerate() { - match breadcrumb { - Breadcrumb::Linear((root_index, definition_index)) => { - debug_assert_eq!(index, 0); - error = error.with_postfixed_info( - &all_modules[*root_index].source, - heap[heap[all_modules[*root_index].root_id].definitions[*definition_index]].position(), - "The cycle started with this definition" - ) - }, - Breadcrumb::Jumping((root_id, definition_id)) => { - debug_assert!(index > 0); - error = error.with_postfixed_info( - &all_modules[root_id.index as usize].source, - heap[*definition_id].position(), - "Which depends on this definition" - ) - } - } - } - Err(error) - } else { - breadcrumbs.push(Breadcrumb::Jumping((new_root_id, new_definition_id))); - Ok(false) + // Check if the embedded type needs to be resolved + let resolve_result = self.resolve_base_parser_type(ctx, &definition.poly_vars, root_id, *variant_type_id)?; + if !self.ingest_resolve_result(ctx, resolve_result)? { + return Ok(false) } - }, - LookupResult::Error((position, message)) => { - Err(ParseError2::new_error(&cur_module.source, position, &message)) } } - }; + } - let mut module_index = 0; - let mut definition_index = 0; - let mut breadcrumbs = Vec::with_capacity(32); // if a user exceeds this, the user sucks at programming - while module_index < modules.len() { - // Go to next module if needed - { - let root = &heap[modules[module_index].root_id]; - if definition_index >= root.definitions.len() { - module_index += 1; - definition_index = 0; - continue; - } - } + if first_tag_value.is_some() && first_int_value.is_some() { + // Not illegal, but useless and probably a programmer mistake + let module_source = &ctx.modules[root_id.index as usize].source; + let tag_pos = first_tag_value.unwrap(); + let int_pos = first_int_value.unwrap(); + return Err( + ParseError2::new_error( + module_source, definition.position, + "Illegal combination of enum integer variant(s) and enum union variant(s)" + ) + .with_postfixed_info(module_source, int_pos, "Assigning an integer value here") + .with_postfixed_info(module_source, tag_pos, "Embedding a type in a union variant here") + ); + } - // Construct breadcrumbs in case we need to follow some types around - debug_assert!(breadcrumbs.is_empty()); - breadcrumbs.push(Breadcrumb::Linear((module_index, definition_index))); - 'resolve_loop: while !breadcrumbs.is_empty() { - // Retrieve module, the module's root and the definition - let (module, definition_id) = match breadcrumbs.last().unwrap() { - Breadcrumb::Linear((module_index, definition_index)) => { - let module = &modules[*module_index]; - let root = &heap[module.root_id]; - let definition_id = root.definitions[*definition_index]; - (module, definition_id) + // Enumeration is legal + if first_tag_value.is_some() { + // Implement as a tagged union + + // Determine the union variants + let mut tag_value = -1; + let mut variants = Vec::with_capacity(definition.variants.len()); + for variant in &definition.variants { + tag_value += 1; + let parser_type = match &variant.value { + EnumVariantValue::None => { + None + }, + EnumVariantValue::Type(parser_type_id) => { + // Type should be resolvable, we checked this above + Some(*parser_type_id) }, - Breadcrumb::Jumping((root_id, definition_id)) => { - let module = &modules[root_id.index as usize]; - debug_assert_eq!(module.root_id, *root_id); - (module, *definition_id) + EnumVariantValue::Integer(_) => { + debug_assert!(false, "Encountered `Integer` variant after asserting enum is a discriminated union"); + unreachable!(); } }; - let definition = &heap[definition_id]; + variants.push(UnionVariant{ + identifier: variant.identifier.clone(), + parser_type, + tag_value, + }) + } - // Because we might have chased around to this particular - // definition before, we check if we haven't resolved the type - // already. - if table.lookup.contains_key(&definition_id) { - breadcrumbs.pop(); - continue; - } + // Ensure union names and polymorphic args do not conflict + self.check_identifier_collision( + ctx, root_id, &variants, |variant| &variant.identifier, "enum variant" + )?; + self.check_poly_args_collision(ctx, root_id, &definition.poly_vars)?; - match definition { - Definition::Enum(definition) => { - // Check the definition to see if we're dealing with an - // enum or a union. If we find any union variants then - // we immediately check if the type is already resolved. - assert!(definition.poly_vars.is_empty(), "Polyvars for enum not yet implemented"); - let mut has_tag_values = None; - let mut has_int_values = None; - for variant in &definition.variants { - match &variant.value { - EnumVariantValue::None => {}, - EnumVariantValue::Integer(_) => { - if has_int_values.is_none() { has_int_values = Some(variant.position); } - }, - EnumVariantValue::Type(variant_type) => { - if has_tag_values.is_none() { has_tag_values = Some(variant.position); } - - let variant_type = &heap[*variant_type]; - - let lookup_result = lookup_type_definition( - heap, &table, symbols, module.root_id, - &variant_type.the_type.primitive - ); - if !handle_lookup_result(heap, modules, module, &mut breadcrumbs, lookup_result)? { - continue 'resolve_loop; - } - }, - } - } + let mut poly_args = self.create_initial_poly_args(&definition.poly_vars); + for variant in &variants { + if let Some(embedded) = variant.parser_type { + self.check_embedded_type_and_modify_poly_args(ctx, &mut poly_args, root_id, embedded)?; + } + } + let is_polymorph = poly_args.iter().any(|arg| arg.is_in_use); - if has_tag_values.is_some() && has_int_values.is_some() { - // Not entirely illegal, but probably not desired - let tag_pos = has_tag_values.unwrap(); - let int_pos = has_int_values.unwrap(); - return Err( - ParseError2::new_error(&module.source, definition.position, "Illegal combination of enum integer variant(s) and enum union variant(s)") - .with_postfixed_info(&module.source, int_pos, "Explicitly assigning an integer value here") - .with_postfixed_info(&module.source, tag_pos, "Explicitly declaring a union variant here") - ) - } + // Insert base definition in type table + let definition_id = definition.this.upcast(); + self.lookup.insert(definition_id, DefinedType { + ast_definition: definition_id, + definition: DefinedTypeVariant::Union(UnionType{ + variants, + tag_representation: Self::enum_tag_type(-1, tag_value), + }), + poly_args, + is_polymorph, + is_pointerlike: false, // TODO: @cyclic_types + monomorphs: Vec::new() + }); + } else { + // Implement as a regular enum + let mut enum_value = -1; + let mut min_enum_value = 0; + let mut max_enum_value = 0; + let mut variants = Vec::with_capacity(definition.variants.len()); + for variant in &definition.variants { + enum_value += 1; + match &variant.value { + EnumVariantValue::None => { + variants.push(EnumVariant{ + identifier: variant.identifier.clone(), + value: enum_value, + }); + }, + EnumVariantValue::Integer(override_value) => { + enum_value = *override_value; + variants.push(EnumVariant{ + identifier: variant.identifier.clone(), + value: enum_value, + }); + }, + EnumVariantValue::Type(_) => { + debug_assert!(false, "Encountered `Type` variant after asserting enum is not a discriminated union"); + unreachable!(); + } + } + if enum_value < min_enum_value { min_enum_value = enum_value; } + else if enum_value > max_enum_value { max_enum_value = enum_value; } + } - // If here, then the definition is a valid discriminated - // union with all of its types resolved, or a valid - // enum. - - // Decide whether to implement as enum or as union - let is_union = has_tag_values.is_some(); - if is_union { - // Implement as discriminated union. Because we - // checked the availability of types above, we are - // safe to lookup type definitions - let mut tag_value = -1; - let mut variants = Vec::with_capacity(definition.variants.len()); - for variant in &definition.variants { - tag_value += 1; - let embedded_type = match &variant.value { - EnumVariantValue::None => { - None - }, - EnumVariantValue::Type(type_annotation_id) => { - // Type should be resolvable, we checked this above - let type_annotation = &heap[*type_annotation_id]; - // TODO: Remove the assert once I'm clear on how to layout "the types" of types - if cfg!(debug_assertions) { - ensure_type_definition(heap, &table, symbols, module.root_id, &type_annotation.the_type.primitive); - } - - Some(*type_annotation_id) - }, - EnumVariantValue::Integer(_) => { - debug_assert!(false, "Encountered `Integer` variant after asserting enum is a discriminated union"); - unreachable!(); - } - }; - - variants.push(UnionVariant{ - identifier: variant.identifier.clone(), - embedded_type, - tag_value, - }) - } + // Ensure enum names and polymorphic args do not conflict + self.check_identifier_collision( + ctx, root_id, &variants, |variant| &variant.identifier, "enum variant" + )?; + self.check_poly_args_collision(ctx, root_id, &definition.poly_vars)?; - table.add_definition(definition_id, DefinedType::Union(UnionType{ - variants, - tag_representation: enum_representation(tag_value) - })); - } else { - // Implement as regular enum - let mut enum_value = -1; // TODO: allow u64 max size - let mut variants = Vec::with_capacity(definition.variants.len()); - for variant in &definition.variants { - enum_value += 1; - match &variant.value { - EnumVariantValue::None => { - variants.push(EnumVariant{ - identifier: variant.identifier.clone(), - value: enum_value, - }); - }, - EnumVariantValue::Integer(override_value) => { - enum_value = *override_value; - variants.push(EnumVariant{ - identifier: variant.identifier.clone(), - value: enum_value, - }); - }, - EnumVariantValue::Type(_) => { - debug_assert!(false, "Encountered `Type` variant after asserting enum is not a discriminated union"); - unreachable!(); - } - } - } + // Note: although we cannot have embedded type dependent on the + // polymorphic variables, they might still be present as tokens + let definition_id = definition.this.upcast(); + self.lookup.insert(definition_id, DefinedType { + ast_definition: definition_id, + definition: DefinedTypeVariant::Enum(EnumType{ + variants, + representation: Self::enum_tag_type(min_enum_value, max_enum_value) + }), + poly_args: self.create_initial_poly_args(&definition.poly_vars), + is_polymorph: false, + is_pointerlike: false, + monomorphs: Vec::new() + }); + } - table.add_definition(definition_id, DefinedType::Enum(EnumType{ - variants, - representation: enum_representation(enum_value), - })); - } - }, - Definition::Struct(definition) => { - // Before we start allocating fields, make sure we can - // actually resolve all of the field types - assert!(definition.poly_vars.is_empty(), "Polyvars for struct not yet implemented"); - for field_definition in &definition.fields { - let type_definition = &heap[field_definition.the_type]; - let lookup_result = lookup_type_definition( - heap, &table, symbols, module.root_id, - &type_definition.the_type.primitive - ); - if !handle_lookup_result(heap, modules, module, &mut breadcrumbs, lookup_result)? { - continue 'resolve_loop; - } - } + Ok(true) + } - // We can resolve everything - let mut fields = Vec::with_capacity(definition.fields.len()); - for field_definition in &definition.fields { - let type_annotation = &heap[field_definition.the_type]; - if cfg!(debug_assertions) { - ensure_type_definition(heap, &table, symbols, module.root_id, &type_annotation.the_type.primitive); - } + /// Resolves the basic struct definition to an entry in the type table. It + /// will not instantiate any monomorphized instances of polymorphic struct + /// definitions. + fn resolve_base_struct_definition(&mut self, ctx: &TypeCtx, root_id: RootId, definition: &StructDefinition) -> Result { + debug_assert!(!self.lookup.contains_key(&definition.this.upcast()), "base struct already resolved"); - fields.push(StructField{ - identifier: field_definition.field.clone(), - field_type: field_definition.the_type - }); - } + // Make sure all fields point to resolvable types + for field_definition in &definition.fields { + let resolve_result = self.resolve_base_parser_type(ctx, &definition.poly_vars, root_id, field_definition.parser_type)?; + if !self.ingest_resolve_result(ctx, resolve_result)? { + return Ok(false) + } + } - table.add_definition(definition_id, DefinedType::Struct(StructType{ - fields, - })); - }, - Definition::Component(definition) => { - // As always, ensure all parameter types are resolved - assert!(definition.poly_vars.is_empty(), "Polyvars for component not yet implemented"); - for parameter_id in &definition.parameters { - let parameter = &heap[*parameter_id]; - let type_definition = &heap[parameter.type_annotation]; - let lookup_result = lookup_type_definition( - heap, &table, symbols, module.root_id, - &type_definition.the_type.primitive - ); - if !handle_lookup_result(heap, modules, module, &mut breadcrumbs, lookup_result)? { - continue 'resolve_loop; - } - } + // All fields types are resolved, construct base type + let mut fields = Vec::with_capacity(definition.fields.len()); + for field_definition in &definition.fields { + fields.push(StructField{ + identifier: field_definition.field.clone(), + parser_type: field_definition.parser_type, + }) + } - // We can resolve everything - let mut parameters = Vec::with_capacity(definition.parameters.len()); - for parameter_id in &definition.parameters { - let parameter = &heap[*parameter_id]; - let type_definition = &heap[parameter.type_annotation]; - if cfg!(debug_assertions) { - ensure_type_definition(heap, &table, symbols, module.root_id, &type_definition.the_type.primitive); - } + // And make sure no conflicts exist in field names and/or polymorphic args + self.check_identifier_collision( + ctx, root_id, &fields, |field| &field.identifier, "struct field" + )?; + self.check_poly_args_collision(ctx, root_id, &definition.poly_vars)?; - parameters.push(FunctionArgument{ - identifier: parameter.identifier.clone(), - argument_type: parameter.type_annotation, - }); - } + // Construct representation of polymorphic arguments + let mut poly_args = self.create_initial_poly_args(&definition.poly_vars); + for field in &fields { + self.check_embedded_type_and_modify_poly_args(ctx, &mut poly_args, root_id, field.parser_type)?; + } - table.add_definition(definition_id, DefinedType::Component(ComponentType{ - variant: definition.variant, - arguments: parameters, // Arguments, parameters, tomayto, tomahto - })); - }, - Definition::Function(definition) => { - assert!(definition.poly_vars.is_empty(), "Polyvars for function not yet implemented"); - // Handle checking the return type - let lookup_result = lookup_type_definition( - heap, &table, symbols, module.root_id, - &heap[definition.return_type].the_type.primitive - ); - if !handle_lookup_result(heap, modules, module, &mut breadcrumbs, lookup_result)? { - continue 'resolve_loop; - } + let is_polymorph = poly_args.iter().any(|arg| arg.is_in_use); - for parameter_id in &definition.parameters { - let parameter = &heap[*parameter_id]; - let type_definition = &heap[parameter.type_annotation]; - let lookup_result = lookup_type_definition( - heap, &table, symbols, module.root_id, - &type_definition.the_type.primitive - ); - if !handle_lookup_result(heap, modules, module, &mut breadcrumbs, lookup_result)? { - continue 'resolve_loop; - } - } + let definition_id = definition.this.upcast(); + self.lookup.insert(definition_id, DefinedType{ + ast_definition: definition_id, + definition: DefinedTypeVariant::Struct(StructType{ + fields, + }), + poly_args, + is_polymorph, + is_pointerlike: false, // TODO: @cyclic + monomorphs: Vec::new(), + }); - // Resolve function's types - let mut parameters = Vec::with_capacity(definition.parameters.len()); - for parameter_id in &definition.parameters { - let parameter = &heap[*parameter_id]; - let type_definition = &heap[parameter.type_annotation]; - if cfg!(debug_assertions) { - ensure_type_definition(heap, &table, symbols, module.root_id, &type_definition.the_type.primitive); - } + Ok(true) + } - parameters.push(FunctionArgument{ - identifier: parameter.identifier.clone(), - argument_type: parameter.type_annotation - }); - } + /// Resolves the basic function definition to an entry in the type table. It + /// will not instantiate any monomorphized instances of polymorphic function + /// definitions. + fn resolve_base_function_definition(&mut self, ctx: &TypeCtx, root_id: RootId, definition: &Function) -> Result { + debug_assert!(!self.lookup.contains_key(&definition.this.upcast()), "base function already resolved"); - table.add_definition(definition_id, DefinedType::Function(FunctionType{ - return_type: definition.return_type.clone(), - arguments: parameters, - })); - }, - } + // Check the return type + let resolve_result = self.resolve_base_parser_type( + ctx, &definition.poly_vars, root_id, definition.return_type + )?; + if !self.ingest_resolve_result(ctx, resolve_result)? { + return Ok(false) + } - // If here, then we layed out the current type definition under - // investigation, so: - debug_assert!(!breadcrumbs.is_empty()); - breadcrumbs.pop(); + // Check the argument types + for param_id in &definition.parameters { + let param = &ctx.heap[*param_id]; + let resolve_result = self.resolve_base_parser_type( + ctx, &definition.poly_vars, root_id, param.parser_type + )?; + if !self.ingest_resolve_result(ctx, resolve_result)? { + return Ok(false) } + } - // Go to next definition - definition_index += 1; + // Construct arguments to function + let mut arguments = Vec::with_capacity(definition.parameters.len()); + for param_id in &definition.parameters { + let param = &ctx.heap[*param_id]; + arguments.push(FunctionArgument{ + identifier: param.identifier.clone(), + parser_type: param.parser_type, + }) } - debug_assert_eq!( - num_definitions, table.lookup.len(), - "expected {} (reserved) definitions in table, got {}", - num_definitions, table.lookup.len() - ); + // Check conflict of argument and polyarg identifiers + self.check_identifier_collision( + ctx, root_id, &arguments, |arg| &arg.identifier, "function argument" + )?; + self.check_poly_args_collision(ctx, root_id, &definition.poly_vars)?; - Ok(table) - } + // Construct polymorphic arguments + let mut poly_args = self.create_initial_poly_args(&definition.poly_vars); + self.check_embedded_type_and_modify_poly_args(ctx, &mut poly_args, root_id, definition.return_type)?; + for argument in &arguments { + self.check_embedded_type_and_modify_poly_args(ctx, &mut poly_args, root_id, argument.parser_type)?; + } - pub(crate) fn get_definition(&self, definition_id: &DefinitionId) -> Option<&DefinedType> { - self.lookup.get(definition_id) + let is_polymorph = poly_args.iter().any(|arg| arg.is_in_use); + + // Construct entry in type table + let definition_id = definition.this.upcast(); + self.lookup.insert(definition_id, DefinedType{ + ast_definition: definition_id, + definition: DefinedTypeVariant::Function(FunctionType{ + return_type: definition.return_type, + arguments, + }), + poly_args, + is_polymorph, + is_pointerlike: false, // TODO: @cyclic + monomorphs: Vec::new(), + }); + + Ok(true) } - fn add_definition(&mut self, definition_id: DefinitionId, definition: DefinedType) { - debug_assert!(!self.lookup.contains_key(&definition_id), "already added definition"); - self.lookup.insert(definition_id, definition); + /// Resolves the basic component definition to an entry in the type table. + /// It will not instantiate any monomorphized instancees of polymorphic + /// component definitions. + fn resolve_base_component_definition(&mut self, ctx: &TypeCtx, root_id: RootId, definition: &Component) -> Result { + debug_assert!(!self.lookup.contains_key(&definition.this.upcast()), "base component already resolved"); + + // Check argument types + for param_id in &definition.parameters { + let param = &ctx.heap[*param_id]; + let resolve_result = self.resolve_base_parser_type( + ctx, &definition.poly_vars, root_id, param.parser_type + )?; + if !self.ingest_resolve_result(ctx, resolve_result)? { + return Ok(false) + } + } + + // Construct argument types + let mut arguments = Vec::with_capacity(definition.parameters.len()); + for param_id in &definition.parameters { + let param = &ctx.heap[*param_id]; + arguments.push(FunctionArgument{ + identifier: param.identifier.clone(), + parser_type: param.parser_type + }) + } + + // Check conflict of argument and polyarg identifiers + self.check_identifier_collision( + ctx, root_id, &arguments, |arg| &arg.identifier, "component argument" + )?; + self.check_poly_args_collision(ctx, root_id, &definition.poly_vars)?; + + // Construct polymorphic arguments + let mut poly_args = self.create_initial_poly_args(&definition.poly_vars); + for argument in &arguments { + self.check_embedded_type_and_modify_poly_args(ctx, &mut poly_args, root_id, argument.parser_type)?; + } + + let is_polymorph = poly_args.iter().any(|v| v.is_in_use); + + // Construct entry in type table + let definition_id = definition.this.upcast(); + self.lookup.insert(definition_id, DefinedType{ + ast_definition: definition_id, + definition: DefinedTypeVariant::Component(ComponentType{ + variant: definition.variant, + arguments, + }), + poly_args, + is_polymorph, + is_pointerlike: false, // TODO: @cyclic + monomorphs: Vec::new(), + }); + + Ok(true) } -} -/// Attempts to lookup a type definition using a namespaced identifier. We have -/// three success cases: the first is simply that the type is a `BuiltIn` type. -/// In the two other cases both we find the definition of the type in the symbol -/// table, but in one case it is already `Resolved` in the type table, and in -/// the other case it is `Unresolved`. In this last case the type has to be -/// resolved before we're able to use it in the `TypeTable` construction -/// algorithm. -/// In the `Error` case something goes wrong with resolving the type. The error -/// message aims to be as helpful as possible to the user. -/// The caller should ensure that the `module_root` is where the `identifier` -/// lives. -fn lookup_type_definition( - heap: &Heap, types: &TypeTable, symbols: &SymbolTable, - module_root: RootId, type_to_resolve: &PrimitiveType -) -> LookupResult { - if let PrimitiveType::Symbolic(type_to_resolve) = type_to_resolve { - let identifier = &type_to_resolve.identifier; - - match symbols.resolve_namespaced_symbol(module_root, identifier) { - None => { - // Failed to find anything at all - LookupResult::Error((identifier.position, String::from("Unknown type"))) - }, - Some((symbol, mut identifier_iter)) => { - match symbol.symbol { - Symbol::Namespace(_root_id) => { - // Reference to a namespace, which is not a type. However, - // the error message depends on whether we have identifiers - // remaining or not - if identifier_iter.num_remaining() == 0 { - LookupResult::Error(( - identifier.position, - String::from("Expected a type, got a module name") - )) + /// Takes a ResolveResult and returns `true` if the caller can happily + /// continue resolving its current type, or `false` if the caller must break + /// resolving the current type and exit to the outer resolving loop. In the + /// latter case the `result` value was `ResolveResult::Unresolved`, implying + /// that the type must be resolved first. + fn ingest_resolve_result(&mut self, ctx: &TypeCtx, result: ResolveResult) -> Result { + match result { + ResolveResult::BuiltIn | ResolveResult::PolyArg => Ok(true), + ResolveResult::Resolved(_) => Ok(true), + ResolveResult::Unresolved((root_id, definition_id)) => { + if self.iter.contains(root_id, definition_id) { + // Cyclic dependency encountered + // TODO: Allow this + let mut error = ParseError2::new_error( + &ctx.modules[root_id.index as usize].source, ctx.heap[definition_id].position(), + "Evaluating this type definition results in a cyclic type" + ); + + for (breadcrumb_idx, (root_id, definition_id)) in self.iter.breadcrumbs.iter().enumerate() { + let msg = if breadcrumb_idx == 0 { + "The cycle started with this definition" } else { - let next_identifier = identifier_iter.next().unwrap(); - LookupResult::Error(( - identifier.position, - format!("Cannot find symbol '{}' in this module", String::from_utf8_lossy(next_identifier)) - )) + "Which depends on this definition" + }; + + error = error.with_postfixed_info( + &ctx.modules[root_id.index as usize].source, + ctx.heap[*definition_id].position(), msg + ); + } + + Err(error) + } else { + // Type is not yet resolved, so push IDs on iterator and + // continue the resolving loop + self.iter.push(root_id, definition_id); + Ok(false) + } + } + } + } + + /// Each type definition may consist of several embedded subtypes. This + /// function checks whether that embedded type is a builtin, a direct + /// reference to a polymorphic argument, or an (un)resolved type definition. + /// If the embedded type's symbol cannot be found then this function returns + /// an error. + /// + /// If the embedded type is resolved, then one always receives the type's + /// (module, definition) tuple. If any of the types in the embedded type's + /// tree is not yet resolved, then one may receive a (module, definition) + /// tuple that does not correspond to the `parser_type_id` passed into this + /// function. + fn resolve_base_parser_type(&mut self, ctx: &TypeCtx, poly_vars: &Vec, root_id: RootId, parser_type_id: ParserTypeId) -> Result { + use ParserTypeVariant as PTV; + + // Prepping iterator + self.parser_type_iter.clear(); + self.parser_type_iter.push_back(parser_type_id); + + // Result for the very first time we resolve a + let mut resolve_result = None; + let mut set_resolve_result = |v: ResolveResult| { + if resolve_result.is_none() { resolve_result = Some(v); } + }; + + 'resolve_loop: while let Some(parser_type_id) = self.parser_type_iter.pop_back() { + let parser_type = &ctx.heap[parser_type_id]; + + match &parser_type.variant { + // Builtin types. An array is a builtin as it is implemented as a + // couple of pointers, so we do not require the subtype to be fully + // resolved. Similar for input/output ports. + PTV::Array(_) | PTV::Input(_) | PTV::Output(_) | PTV::Message | + PTV::Bool | PTV::Byte | PTV::Short | PTV::Int | PTV::Long | + PTV::String => { + set_resolve_result(ResolveResult::BuiltIn); + }, + // IntegerLiteral types and the inferred marker are not allowed in + // definitions of types + PTV::IntegerLiteral | + PTV::Inferred => { + debug_assert!(false, "Encountered illegal ParserTypeVariant within type definition"); + unreachable!(); + }, + // Symbolic type, make sure its base type, and the base types + // of all members of the embedded type tree are resolved. We + // don't care about monomorphs yet. + PTV::Symbolic(symbolic) => { + // Check if the symbolic type is one of the definition's + // polymorphic arguments. If so then we can halt the + // execution + for poly_arg in poly_vars.iter() { + if poly_arg.value == symbolic.identifier.value { + set_resolve_result(ResolveResult::PolyArg); + continue 'resolve_loop; } - }, - Symbol::Definition((definition_root_id, definition_id)) => { - // Got a definition, but we may also have more identifier's - // remaining in the identifier iterator - let definition = &heap[definition_id]; - if identifier_iter.num_remaining() == 0 { - // See if the type is resolved, and make sure it is - // a non-function, non-component type. Ofcourse - // these are valid types, but we cannot (yet?) use - // them as function arguments, struct fields or enum - // variants - match definition { - Definition::Component(definition) => { - return LookupResult::Error(( - identifier.position, - format!( - "Cannot use the component '{}' as an embedded type", - String::from_utf8_lossy(&definition.identifier.value) - ) - )); - }, - Definition::Function(definition) => { - return LookupResult::Error(( - identifier.position, - format!( - "Cannot use the function '{}' as an embedded type", - String::from_utf8_lossy(&definition.identifier.value) + } + + // Lookup the definition in the symbol table + let symbol = ctx.symbols.resolve_namespaced_symbol(root_id, &symbolic.identifier); + if symbol.is_none() { + return Err(ParseError2::new_error( + &ctx.modules[root_id.index as usize].source, symbolic.identifier.position, + "Could not resolve type" + )) + } + + let (symbol_value, mut ident_iter) = symbol.unwrap(); + match symbol_value.symbol { + Symbol::Namespace(_) => { + // Reference to a namespace instead of a type + return if ident_iter.num_remaining() == 0 { + Err(ParseError2::new_error( + &ctx.modules[root_id.index as usize].source, symbolic.identifier.position, + "Expected a type, got a module name" + )) + } else { + let next_identifier = ident_iter.next().unwrap(); + Err(ParseError2::new_error( + &ctx.modules[root_id.index as usize].source, symbolic.identifier.position, + &format!("Could not find symbol '{}' with this module", String::from_utf8_lossy(next_identifier)) + )) + } + }, + Symbol::Definition((root_id, definition_id)) => { + let definition = &ctx.heap[definition_id]; + if ident_iter.num_remaining() > 0 { + // Namespaced identifier is longer than the type + // we found. Return the appropriate message + return if definition.is_struct() || definition.is_enum() { + Err(ParseError2::new_error( + &ctx.modules[root_id.index as usize].source, symbolic.identifier.position, + &format!( + "Unknown type '{}', did you mean to use '{}'?", + String::from_utf8_lossy(&symbolic.identifier.value), + String::from_utf8_lossy(&definition.identifier().value) ) - )); - }, - Definition::Enum(_) | Definition::Struct(_) => {} + )) + } else { + Err(ParseError2::new_error( + &ctx.modules[root_id.index as usize].source, symbolic.identifier.position, + "Unknown type" + )) + } } - return if types.lookup.contains_key(&definition_id) { - LookupResult::Resolved((definition_root_id, definition_id)) - } else { - LookupResult::Unresolved((definition_root_id, definition_id)) + // Found a match, make sure it is a datatype + if !(definition.is_struct() || definition.is_enum()) { + return Err(ParseError2::new_error( + &ctx.modules[root_id.index as usize].source, symbolic.identifier.position, + "Embedded types must be datatypes (structs or enums)" + )) } - } else if identifier_iter.num_remaining() == 1 { - // This is always invalid, but if the type is an - // enumeration or a union then we want to return a - // different error message. - if definition.is_enum() { - let last_identifier = identifier_iter.next().unwrap(); - return LookupResult::Error(( - identifier.position, - format!( - "Expected a type, but got a (possible) enum variant '{}'. Only the enum '{}' itself can be used as a type", - String::from_utf8_lossy(last_identifier), - String::from_utf8_lossy(&definition.identifier().value) - ) - )); + + // Found a struct/enum definition + if !self.lookup.contains_key(&definition_id) { + // Type is not yet resoled, immediately return + // this + return Ok(ResolveResult::Unresolved((root_id, definition_id))); + } + + // Type is resolved, so set as result, but continue + // iterating over the parser types in the embedded + // type's tree + set_resolve_result(ResolveResult::Resolved((root_id, definition_id))); + + // Note: because we're resolving parser types, not + // embedded types, we're parsing a tree, so we can't + // get stuck in a cyclic loop. + for poly_arg_type_id in &symbolic.poly_args { + self.parser_type_iter.push_back(*poly_arg_type_id); } } + } + } + } + } - // Too much identifiers (>1) for an enumeration, or not an - // enumeration and we had more identifiers remaining - LookupResult::Error(( - identifier.position, - format!( - "Unknown type '{}', did you mean to use '{}'?", - String::from_utf8_lossy(&identifier.value), - String::from_utf8_lossy(&definition.identifier().value) - ) - )) + // If here then all types in the embedded type's tree were resolved. + debug_assert!(resolve_result.is_some(), "faulty logic in ParserType resolver"); + return Ok(resolve_result.unwrap()) + } + + fn create_initial_poly_args(&self, poly_args: &[Identifier]) -> Vec { + poly_args + .iter() + .map(|v| PolyArg{ identifier: v.clone(), is_in_use: false }) + .collect() + } + + /// This function modifies the passed `poly_args` by checking the embedded + /// type tree. This should be called after `resolve_base_parser_type` is + /// called on each node in this tree: we assume that each symbolic type was + /// resolved to either a polymorphic arg or a definition. + /// + /// This function will also make sure that if the embedded type has + /// polymorphic variables itself, that the number of polymorphic variables + /// matches the number of arguments in the associated definition. + fn check_embedded_type_and_modify_poly_args( + &mut self, ctx: &TypeCtx, poly_args: &mut [PolyArg], root_id: RootId, embedded_type_id: ParserTypeId, + ) -> Result<(), ParseError2> { + self.parser_type_iter.clear(); + self.parser_type_iter.push_back(embedded_type_id); + + 'type_loop: while let Some(embedded_type_id) = self.parser_type_iter.pop_back() { + let embedded_type = &ctx.heap[embedded_type_id]; + if let ParserTypeVariant::Symbolic(symbolic) = &embedded_type.variant { + // Check if it matches any of the polymorphic arguments + for (_poly_arg_idx, poly_arg) in poly_args.iter_mut().enumerate() { + if poly_arg.identifier.value == symbolic.identifier.value { + poly_arg.is_in_use = true; + // TODO: Set symbolic value as polyarg in heap + // TODO: If we allow higher-kinded types in the future, + // then we can't continue here, but must resolve the + // polyargs as well + debug_assert!(symbolic.poly_args.is_empty()); + continue 'type_loop; } } + + // Must match a definition + // TODO: Set symbolic value as definition in heap + let symbol = ctx.symbols.resolve_namespaced_symbol(root_id, &symbolic.identifier); + debug_assert!(symbol.is_some(), "could not resolve symbolic parser type when determining poly args"); + let (symbol, ident_iter) = symbol.unwrap(); + debug_assert_eq!(ident_iter.num_remaining(), 0, "no exact symbol match when determining poly args"); + let (_root_id, definition_id) = symbol.as_definition().unwrap(); + + // Must be a struct, enum, or union + let defined_type = self.lookup.get(&definition_id).unwrap(); + if cfg!(debug_assertions) { + let type_class = defined_type.definition.type_class(); + debug_assert!( + type_class == TypeClass::Struct || type_class == TypeClass::Enum || type_class == TypeClass::Union, + "embedded type's class is not struct, enum or union" + ); + } + + if symbolic.poly_args.len() != defined_type.poly_args.len() { + // Mismatch in number of polymorphic arguments. This is not + // allowed in type definitions (no inference allowed). + let module_source = &ctx.modules[root_id.index as usize].source; + let number_args_msg = if defined_type.poly_args.is_empty() { + String::from("is not polymorphic") + } else { + format!("accepts {} polymorphic arguments", defined_type.poly_args.len()) + }; + + return Err(ParseError2::new_error( + module_source, symbolic.identifier.position, + &format!( + "The type '{}' {}, but {} polymorphic arguments were provided", + String::from_utf8_lossy(&symbolic.identifier.value), + number_args_msg, symbolic.poly_args.len() + ) + )); + } + + self.parser_type_iter.extend(&symbolic.poly_args); } } - } else { - LookupResult::BuiltIn + + // All nodes in the embedded type tree were valid + Ok(()) } -} -/// Debugging function to ensure a type is resolved (calling -/// `lookup_type_definition` and ensuring its return value is `BuiltIn` or -/// `Resolved` -#[cfg(debug_assertions)] -fn ensure_type_definition( - heap: &Heap, types: &TypeTable, symbols: &SymbolTable, - module_root: RootId, type_to_resolve: &PrimitiveType -) { - match lookup_type_definition(heap, types, symbols, module_root, type_to_resolve) { - LookupResult::BuiltIn | LookupResult::Resolved(_) => {}, - LookupResult::Unresolved((_, definition_id)) => { - assert!( - false, - "Expected that type definition for {} was resolved by the type table, but it wasn't", - String::from_utf8_lossy(&heap[definition_id].identifier().value) - ) - }, - LookupResult::Error((_, error)) => { - let message = if let PrimitiveType::Symbolic(symbolic) = type_to_resolve { - format!( - "Expected that type definition for {} was resolved by the type table, but it returned: {}", - String::from_utf8_lossy(&symbolic.identifier.value), &error - ) - } else { - format!( - "Expected (non-symbolic!?) type definition to be resolved, but it returned: {}", - &error - ) - }; - assert!(false, "{}", message) + /// Go through a list of identifiers and ensure that all identifiers have + /// unique names + fn check_identifier_collision &Identifier>( + &self, ctx: &TypeCtx, root_id: RootId, items: &[T], getter: F, item_name: &'static str + ) -> Result<(), ParseError2> { + for (item_idx, item) in items.iter().enumerate() { + let item_ident = getter(item); + for other_item in &items[0..item_idx] { + let other_item_ident = getter(other_item); + if item_ident.value == other_item_ident.value { + let module_source = &ctx.modules[root_id.index as usize].source; + return Err(ParseError2::new_error( + module_source, item_ident.position, &format!("This {} is defined more than once", item_name) + ).with_postfixed_info( + module_source, other_item_ident.position, &format!("The other {} is defined here", item_name) + )); + } + } } + + Ok(()) } -} -/// Determines an enumeration's integer representation type, or a union's tag -/// type, using the maximum value of the tag. The returned type is always a -/// builtin type. -/// TODO: Fix for maximum u64 value -fn enum_representation(max_tag_value: i64) -> PrimitiveType { - if max_tag_value <= u8::max_value() as i64 { - PrimitiveType::Byte - } else if max_tag_value <= u16::max_value() as i64 { - PrimitiveType::Short - } else if max_tag_value <= u32::max_value() as i64 { - PrimitiveType::Int - } else { - PrimitiveType::Long + /// Go through a list of polymorphic arguments and make sure that the + /// arguments all have unique names, and the arguments do not conflict with + /// any symbols defined at the module scope. + fn check_poly_args_collision( + &self, ctx: &TypeCtx, root_id: RootId, poly_args: &[Identifier] + ) -> Result<(), ParseError2> { + // Make sure polymorphic arguments are unique and none of the + // identifiers conflict with any imported scopes + for (arg_idx, poly_arg) in poly_args.iter().enumerate() { + for other_poly_arg in &poly_args[..arg_idx] { + if poly_arg.value == other_poly_arg.value { + let module_source = &ctx.modules[root_id.index as usize].source; + return Err(ParseError2::new_error( + module_source, poly_arg.position, + "This polymorphic argument is defined more than once" + ).with_postfixed_info( + module_source, other_poly_arg.position, + "It conflicts with this polymorphic argument" + )); + } + } + + // Check if identifier conflicts with a symbol defined or imported + // in the current module + if let Some(symbol) = ctx.symbols.resolve_symbol(root_id, &poly_arg.value) { + // We have a conflict + let module_source = &ctx.modules[root_id.index as usize].source; + return Err(ParseError2::new_error( + module_source, poly_arg.position, + "This polymorphic argument conflicts with another symbol" + ).with_postfixed_info( + module_source, symbol.position, + "It conflicts due to this symbol" + )); + } + } + + // All arguments are fine + Ok(()) + } + + //-------------------------------------------------------------------------- + // Small utilities + //-------------------------------------------------------------------------- + + fn enum_tag_type(min_tag_value: i64, max_tag_value: i64) -> PrimitiveType { + // TODO: @consistency tag values should be handled correctly + debug_assert!(min_tag_value < max_tag_value); + let abs_max_value = min_tag_value.abs().max(max_tag_value.abs()); + if abs_max_value <= u8::max_value() as i64 { + PrimitiveType::Byte + } else if abs_max_value <= u16::max_value() as i64 { + PrimitiveType::Short + } else if abs_max_value <= u32::max_value() as i64 { + PrimitiveType::Int + } else { + PrimitiveType::Long + } + } + + fn find_root_id(ctx: &TypeCtx, definition_id: DefinitionId) -> RootId { + // TODO: Keep in lookup or something + for module in ctx.modules { + let root_id = module.root_id; + let root = &ctx.heap[root_id]; + for module_definition_id in root.definitions.iter() { + if *module_definition_id == definition_id { + return root_id + } + } + } + + debug_assert!(false, "DefinitionId without corresponding RootId"); + unreachable!(); } } \ No newline at end of file diff --git a/src/protocol/parser/type_table2.rs b/src/protocol/parser/type_table2.rs deleted file mode 100644 index b7becfa5344a0ec3bb90aa05b7d2d01b40e945fa..0000000000000000000000000000000000000000 --- a/src/protocol/parser/type_table2.rs +++ /dev/null @@ -1,964 +0,0 @@ -/** -TypeTable - -Contains the type table: a datastructure that, when compilation succeeds, -contains a concrete type definition for each AST type definition. In general -terms the type table will go through the following phases during the compilation -process: - - 1. The base type definitions are resolved after the parser phase has - finished. This implies that the AST is fully constructed, but not yet - annotated. - 2. With the base type definitions resolved, the validation/linker phase will - use the type table (together with the symbol table) to disambiguate - terms (e.g. does an expression refer to a variable, an enum, a constant, - etc.) - 3. During the type checking/inference phase the type table is used to ensure - that the AST contains valid use of types in expressions and statements. - At the same time type inference will find concrete instantiations of - polymorphic types, these will be stored in the type table as monomorphed - instantiations of a generic type. - 4. After type checking and inference (and possibly when constructing byte - code) the type table will construct a type graph and solidify each - non-polymorphic type and monomorphed instantiations of polymorphic types - into concrete types. - -So a base type is defined by its (optionally polymorphic) representation in the -AST. A concrete type replaces all polymorphic arguments with inferred types. A -struct, enum or union may have polymorphic arguments but not actually be a -polymorphic type. This happens when the polymorphic arguments are not used in -the type definition itself. - -NOTE: for now a polymorphic definition of a function/component is illegal if the - polymorphic arguments are not used in the arguments/return type. It should - be legal, but we disallow it for now. - -TODO: Allow potentially cyclic datatypes and reject truly cyclic datatypes. -TODO: Allow for the full potential of polymorphism -TODO: Detect "true" polymorphism: for datatypes like structs/enum/unions this - is simple. For functions we need to check the entire body. Do it here? Or - do it somewhere else? -TODO: Do we want to check fn argument collision here, or in validation phase? -*/ - -use std::fmt::{Formatter, Result as FmtResult}; -use std::collections::HashMap; - -use crate::protocol::ast::*; -use crate::protocol::parser::symbol_table::{SymbolTable, Symbol}; -use crate::protocol::inputsource::*; -use crate::protocol::parser::*; -use crate::protocol::parser::type_table2::ResolveResult::Resolved; - -//------------------------------------------------------------------------------ -// Defined Types -//------------------------------------------------------------------------------ - -#[derive(Copy, Clone, PartialEq, Eq)] -pub enum TypeClass { - Enum, - Union, - Struct, - Function, - Component -} - -impl TypeClass { - pub(crate) fn display_name(&self) -> &'static str { - match self { - TypeClass::Enum => "enum", - TypeClass::Union => "enum", - TypeClass::Struct => "struct", - TypeClass::Function => "function", - TypeClass::Component => "component", - } - } -} - -impl std::fmt::Display for TypeClass { - fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult { - write!(f, "{}", self.display_name()) - } -} - -/// Struct wrapping around a potentially polymorphic type. If the type does not -/// have any polymorphic arguments then it will not have any monomorphs and -/// `is_polymorph` will be set to `false`. A type with polymorphic arguments -/// only has `is_polymorph` set to `true` if the polymorphic arguments actually -/// appear in the types associated types (function return argument, struct -/// field, enum variant, etc.). Otherwise the polymorphic argument is just a -/// marker and does not influence the bytesize of the type. -pub struct DefinedType { - ast_definition: DefinitionId, - definition: DefinedTypeVariant, - poly_args: Vec, - is_polymorph: bool, - is_pointerlike: bool, - monomorphs: Vec, // TODO: ? -} - -pub enum DefinedTypeVariant { - Enum(EnumType), - Union(UnionType), - Struct(StructType), - Function(FunctionType), - Component(ComponentType) -} - -pub struct PolyArg { - identifier: Identifier, - is_phantom: bool, -} - -impl DefinedTypeVariant { - pub(crate) fn type_class(&self) -> TypeClass { - match self { - DefinedTypeVariant::Enum(_) => TypeClass::Enum, - DefinedTypeVariant::Union(_) => TypeClass::Union, - DefinedTypeVariant::Struct(_) => TypeClass::Struct, - DefinedTypeVariant::Function(_) => TypeClass::Function, - DefinedTypeVariant::Component(_) => TypeClass::Component - } - } -} - -/// `EnumType` is the classical C/C++ enum type. It has various variants with -/// an assigned integer value. The integer values may be user-defined, -/// compiler-defined, or a mix of the two. If a user assigns the same enum -/// value multiple times, we assume the user is an expert and we consider both -/// variants to be equal to one another. -pub struct EnumType { - variants: Vec, - representation: PrimitiveType, -} - -// TODO: Also support maximum u64 value -pub struct EnumVariant { - identifier: Identifier, - value: i64, -} - -/// `UnionType` is the algebraic datatype (or sum type, or discriminated union). -/// A value is an element of the union, identified by its tag, and may contain -/// a single subtype. -pub struct UnionType { - variants: Vec, - tag_representation: PrimitiveType -} - -pub struct UnionVariant { - identifier: Identifier, - parser_type: Option, - tag_value: i64, -} - -pub struct StructType { - fields: Vec, -} - -pub struct StructField { - identifier: Identifier, - parser_type: ParserTypeId, -} - -pub struct FunctionType { - return_type: ParserTypeId, - arguments: Vec -} - -pub struct ComponentType { - variant: ComponentVariant, - arguments: Vec -} - -pub struct FunctionArgument { - identifier: Identifier, - parser_type: ParserTypeId, -} - -//------------------------------------------------------------------------------ -// Type table -//------------------------------------------------------------------------------ - -// TODO: @cleanup Do I really need this, doesn't make the code that much cleaner -struct TypeIterator { - breadcrumbs: Vec<(RootId, DefinitionId)> -} - -impl TypeIterator { - fn new() -> Self { - Self{ breadcrumbs: Vec::with_capacity(32) } - } - - fn reset(&mut self, root_id: RootId, definition_id: DefinitionId) { - self.breadcrumbs.clear(); - self.breadcrumbs.push((root_id, definition_id)) - } - - fn push(&mut self, root_id: RootId, definition_id: DefinitionId) { - self.breadcrumbs.push((root_id, definition_id)); - } - - fn contains(&self, root_id: RootId, definition_id: DefinitionId) -> bool { - for (stored_root_id, stored_definition_id) in self.breadcrumbs.iter() { - if stored_root_id == root_id && stored_definition_id == definition_id { return true; } - } - - return false - } - - fn top(&self) -> Option<(RootId, DefinitionId)> { - self.breadcrumbs.last().map(|(r, d)| (*r, *d)) - } - - fn pop(&mut self) { - debug_assert!(!self.breadcrumbs.is_empty()); - self.breadcrumbs.pop(); - } -} - -#[derive(PartialEq, Eq)] -enum SpecifiedTypeVariant { - // No subtypes - Message, - Bool, - Byte, - Short, - Int, - Long, - String, - // Always one subtype - ArrayOf, - InputOf, - OutputOf, - // Variable number of subtypes, depending on the polymorphic arguments on - // the definition - InstanceOf(DefinitionId, usize) -} - -#[derive(Eq)] -struct SpecifiedType { - /// Definition ID, may not be enough as the type may be polymorphic - definition: DefinitionId, - /// The polymorphic types for the definition. These are encoded in a list, - /// which we interpret as the depth-first serialization of the type tree. - poly_vars: Vec -} - -impl PartialEq for SpecifiedType { - fn eq(&self, other: &Self) -> bool { - // Should point to same definition and have the same polyvars - if self.definition.index != other.definition.index { return false; } - if self.poly_vars.len() != other.poly_vars.len() { return false; } - for (my_var, other_var) in self.poly_vars.iter().zip(other.poly_vars.iter()) { - if my_var != other_var { return false; } - } - - return true - } -} - -impl SpecifiedType { - fn new_non_polymorph(definition: DefinitionId) -> Self { - Self{ definition, poly_vars: Vec::new() } - } - - fn new_polymorph(definition: DefinitionId, heap: &Heap, parser_type_id: ParserTypeId) -> Self { - // Serialize into concrete types - let mut poly_vars = Vec::new(); - Self::construct_poly_vars(&mut poly_vars, heap, parser_type_id); - Self{ definition, poly_vars } - } - - fn construct_poly_vars(poly_vars: &mut Vec, heap: &Heap, parser_type_id: ParserTypeId) { - // Depth-first construction of poly vars - let parser_type = &heap[parser_type_id]; - match &parser_type.variant { - ParserTypeVariant::Message => { poly_vars.push(SpecifiedTypeVariant::Message); }, - ParserTypeVariant::Bool => { poly_vars.push(SpecifiedTypeVariant::Bool); }, - ParserTypeVariant::Byte => { poly_vars.push(SpecifiedTypeVariant::Byte); }, - ParserTypeVariant::Short => { poly_vars.push(SpecifiedTypeVariant::Short); }, - ParserTypeVariant::Int => { poly_vars.push(SpecifiedTypeVariant::Int); }, - ParserTypeVariant::Long => { poly_vars.push(SpecifiedTypeVariant::Long); }, - ParserTypeVariant::String => { poly_vars.push(SpecifiedTypeVariant::String); }, - ParserTypeVariant::Array(subtype_id) => { - poly_vars.push(SpecifiedTypeVariant::ArrayOf); - Self::construct_poly_vars(poly_vars, heap, *subtype_id); - }, - ParserTypeVariant::Input(subtype_id) => { - poly_vars.push(SpecifiedTypeVariant::InputOf); - Self::construct_poly_vars(poly_vars, heap, *subtype_id); - }, - ParserTypeVariant::Output(subtype_id) => { - poly_vars.push(SpecifiedTypeVariant::OutputOf); - Self::construct_poly_vars(poly_vars, heap, *subtype_id); - }, - ParserTypeVariant::Symbolic(symbolic) => { - let definition_id = match symbolic.variant { - SymbolicParserTypeVariant::Definition(definition_id) => definition_id, - SymbolicParserTypeVariant::PolyArg(_) => { - // When construct entries in the type table, we no longer allow the - // unspecified types in the AST, we expect them to be fully inferred. - debug_assert!(false, "Encountered 'PolyArg' symbolic type. Expected fully inferred types"); - unreachable!(); - } - }; - - poly_vars.push(SpecifiedTypeVariant::InstanceOf(definition_id, symbolic.poly_args.len())); - for subtype_id in &symbolic.poly_args { - Self::construct_poly_vars(poly_vars, heap, *subtype_id); - } - }, - ParserTypeVariant::IntegerLiteral => { - debug_assert!(false, "Encountered 'IntegerLiteral' symbolic type. Expected fully inferred types"); - unreachable!(); - }, - ParserTypeVariant::Inferred => { - debug_assert!(false, "Encountered 'Inferred' symbolic type. Expected fully inferred types"); - unreachable!(); - } - } - } -} - -enum ResolveResult { - BuiltIn, - PolyArg, - Resolved((RootId, DefinitionId)), - Unresolved((RootId, DefinitionId)) -} - -pub(crate) struct TypeTable { - /// Lookup from AST DefinitionId to a defined type. Considering possible - /// polymorphs is done inside the `DefinedType` struct. - lookup: HashMap, - /// Iterator over (module, definition) tuples used as workspace to make sure - /// that each base definition of all a type's subtypes are resolved. - iter: TypeIterator, -} - -pub(crate) struct TypeCtx<'a> { - symbols: &'a SymbolTable, - heap: &'a Heap, - modules: &'a [LexedModule] -} - -impl TypeTable { - /// Construct a new type table without any resolved types. Types will be - /// resolved on-demand. - pub(crate) fn new(ctx: &TypeCtx) -> Self { - // Use context to guess hashmap size - let reserve_size = ctx.heap.definitions.len(); - Self{ - lookup: HashMap::with_capacity(reserve_size), - iter: TypeIterator::new(), - } - } - - pub(crate) fn resolve_type(&mut self, ctx: &TypeCtx, specified_type: &SpecifiedType) { - // Make sure we're allowed to cast root_id to index into ctx.modules - if cfg!(debug_assertions) { - for (index, module) in ctx.modules.iter().enumerate() { - debug_assert_eq!(index, module.root_id.index as usize); - } - } - - // Check if we already have the definition - let definition = self.get_definition(definition_id); - - } - - /// This function will resolve just the basic definition of the type, it - /// will not handle any of the monomorphized instances of the type. - fn resolve_base_definition(&mut self, ctx: &TypeCtx, definition_id: DefinitionId) -> &DefinedType { - // Check if we have already resolved the base definition - if let Some(definition) = self.lookup.get(&definition_id) { - return definition; - } - - let root_id = Self::find_root_id(ctx, definition_id); - self.iter.reset(root_id, definition_id); - - while let Some((root_id, definition_id)) = self.iter.top() { - // We have a type to resolve - let definition = &ctx.heap[definition_id]; - - let can_pop_breadcrumb = match definition { - Definition::Enum(definition) => self.resolve_base_enum_definition(ctx, root_id, definition), - Definition::Struct(definition) => self.resolve_base_struct_definition(ctx, root_id, definition), - Definition::Component(definition) => self.resolve_base_component_definition(ctx, root_id, definition), - Definition::Function(definition) => self.resolve_base_function_definition(ctx, root_id, definition), - }?; - - // Otherwise: `ingest_resolve_result` has pushed a new breadcrumb - // that we must follow before we can resolve the current type - if can_pop_breadcrumb { - self.iter.pop(); - } - } - - // We must have resolved the type - debug_assert!(self.lookup.contains_key(&definition_id), "base type not resolved"); - self.lookup.get(&definition_id).unwrap() - } - - /// Resolve the basic enum definition to an entry in the type table. It will - /// not instantiate any monomorphized instances of polymorphic enum - /// definitions. If a subtype has to be resolved first then this function - /// will return `false` after calling `ingest_resolve_result`. - fn resolve_base_enum_definition(&mut self, ctx: &TypeCtx, root_id: RootId, definition: &EnumDefinition) -> Result { - debug_assert!(!self.lookup.contains_key(&definition.this.upcast()), "base enum already resolved"); - - // Check if the enum should be implemented as a classic enumeration or - // a tagged union. Keep track of variant index for error messages. Make - // sure all embedded types are resolved. - let mut first_tag_value = None; - let mut first_int_value = None; - for variant in &definition.variants { - match &variant.value { - EnumVariantValue::None => {}, - EnumVariantValue::Integer(_) => if first_int_value.is_none() { - first_int_value = Some(variant.position); - }, - EnumVariantValue::Type(variant_type_id) => { - if first_tag_value.is_none() { - first_tag_value = Some(variant.position); - } - - // Check if the embedded type needs to be resolved - let resolve_result = self.resolve_base_parser_type(ctx, &definition.poly_vars, root_id, *variant_type_id)?; - if !self.ingest_resolve_result(ctx, resolve_result) { - return Ok(false) - } - } - } - } - - if first_tag_value.is_some() && first_int_value.is_some() { - // Not illegal, but useless and probably a programmer mistake - let module_source = &ctx.modules[root_id.index as usize].source; - let tag_pos = first_tag_value.unwrap(); - let int_pos = first_int_value.unwrap(); - return Err( - ParseError2::new_error( - module_source, definition.position, - "Illegal combination of enum integer variant(s) and enum union variant(s)" - ) - .with_postfixed_info(module_source, int_pos, "Assigning an integer value here") - .with_postfixed_info(module_source, tag_pos, "Embedding a type in a union variant here") - ); - } - - // Enumeration is legal - if first_tag_value.is_some() { - // Implement as a tagged union - - // Determine the union variants - let mut tag_value = -1; - let mut variants = Vec::with_capacity(definition.variants.len()); - for variant in &definition.variants { - tag_value += 1; - let parser_type = match &variant.value { - EnumVariantValue::None => { - None - }, - EnumVariantValue::Type(parser_type_id) => { - // Type should be resolvable, we checked this above - Some(*parser_type_id) - }, - EnumVariantValue::Integer(_) => { - debug_assert!(false, "Encountered `Integer` variant after asserting enum is a discriminated union"); - unreachable!(); - } - }; - - variants.push(UnionVariant{ - identifier: variant.identifier.clone(), - parser_type, - tag_value, - }) - } - - // Ensure union names and polymorphic args do not conflict - self.check_identifier_collision( - ctx, root_id, &variants, |variant| &variant.identifier, "enum variant" - )?; - self.check_poly_args_collision(ctx, root_id, &definition.poly_args)?; - - // Insert base definition in type table - let definition_id = definition.this.upcast(); - self.lookup.insert(definition_id, DefinedType { - ast_definition: definition_id, - definition: DefinedTypeVariant::Union(UnionType{ - variants, - tag_representation: Self::enum_tag_type(-1, tag_value), - }), - poly_args: Vec::new(), - is_polymorph: self.poly_args_in_use.iter().any(|v| *v), - is_pointerlike: false, // TODO: @cyclic_types - monomorphs: Vec::new() - }); - } else { - // Implement as a regular enum - debug_assert!(self.poly_args_in_use.iter().all(|v| !*v)); - let mut enum_value = -1; - let mut min_enum_value = 0; - let mut max_enum_value = 0; - let mut variants = Vec::with_capacity(definition.variants.len()); - for variant in &definition.variants { - enum_value += 1; - match &variant.value { - EnumVariantValue::None => { - variants.push(EnumVariant{ - identifier: variant.identifier.clone(), - value: enum_value, - }); - }, - EnumVariantValue::Integer(override_value) => { - enum_value = *override_value; - variants.push(EnumVariant{ - identifier: variant.identifier.clone(), - value: enum_value, - }); - }, - EnumVariantValue::Type(_) => { - debug_assert!(false, "Encountered `Type` variant after asserting enum is not a discriminated union"); - unreachable!(); - } - } - if enum_value < min_enum_value { min_enum_value = enum_value; } - else if enum_value > max_enum_value { max_enum_value = enum_value; } - } - - // Ensure enum names and polymorphic args do not conflict - self.check_identifier_collision( - ctx, root_id, &variants, |variant| &variant.identifier, "enum variant" - )?; - self.check_poly_args_collision(ctx, root_id, &definition.poly_vars)?; - - let definition_id = definition.this.upcast(); - self.lookup.insert(definition_id, DefinedType { - ast_definition: definition_id, - definition: DefinedTypeVariant::Enum(EnumType{ - variants, - representation: Self::enum_tag_type(min_enum_value, max_enum_value) - }), - poly_args: Vec::new(), - is_polymorph: false, - is_pointerlike: false, - monomorphs: Vec::new() - }); - } - - Ok(true) - } - - /// Resolves the basic struct definition to an entry in the type table. It - /// will not instantiate any monomorphized instances of polymorphic struct - /// definitions. - fn resolve_base_struct_definition(&mut self, ctx: &TypeCtx, root_id: RootId, definition: &StructDefinition) -> Result { - debug_assert!(!self.lookup.contains_key(&definition.this.upcast()), "base struct already resolved"); - - // Make sure all fields point to resolvable types - for field_definition in &definition.fields { - let resolve_result = self.resolve_base_parser_type(ctx, &definition.poly_vars, root_id, field_definition.parser_type)?; - if !self.ingest_resolve_result(ctx, resolve_result)? { - return Ok(false) - } - } - - // All fields types are resolved, construct base type - let mut fields = Vec::with_capacity(definition.fields.len()); - for field_definition in &definition.fields { - fields.push(StructField{ - identifier: field_definition.field.clone(), - parser_type: field_definition.parser_type, - }) - } - - // And make sure no conflicts exist in field names and/or polymorphic args - self.check_identifier_collision( - ctx, root_id, &fields, |field| &field.identifier, "struct field" - )?; - self.check_poly_args_collision(ctx, root_id, &definition.poly_vars)?; - - let definition_id = definition.this.upcast(); - self.lookup.insert(definition_id, DefinedType{ - ast_definition: definition_id, - definition: DefinedTypeVariant::Struct(StructType{ - fields, - }), - poly_args: Vec::new(), - is_polymorph: false, // TODO: @polymorphism - is_pointerlike: false, // TODO: @cyclic - monomorphs: Vec::new(), - }); - - Ok(true) - } - - /// Resolves the basic function definition to an entry in the type table. It - /// will not instantiate any monomorphized instances of polymorphic function - /// definitions. - fn resolve_base_function_definition(&mut self, ctx: &TypeCtx, root_id: RootId, definition: &Function) -> Result { - debug_assert!(!self.lookup.contains_key(&definition.this.upcast()), "base function already resolved"); - - // Check the return type - let resolve_result = self.resolve_base_parser_type( - ctx, &definition.poly_vars, root_id, definition.return_type - )?; - if !self.ingest_resolve_result(ctx, resolve_result)? { - return Ok(false) - } - - // Check the argument types - for param_id in &definition.parameters { - let param = &ctx.heap[*param_id]; - let resolve_result = self.resolve_base_parser_type( - ctx, &definition.poly_vars, root_id, param.parser_type - )?; - if !self.ingest_resolve_result(ctx, resolve_result)? { - return Ok(false) - } - } - - // Construct arguments to function - let mut arguments = Vec::with_capacity(definition.parameters.len()); - for param_id in &definition.parameters { - let param = &ctx.heap[*param_id]; - arguments.push(FunctionArgument{ - identifier: param.identifier.clone(), - parser_type: param.parser_type, - }) - } - - // Check conflict of argument and polyarg identifiers - self.check_identifier_collision( - ctx, root_id, &arguments, |arg| &arg.identifier, "function argument" - )?; - self.check_poly_args_collision(ctx, root_id, &definition.poly_vars)?; - - // Construct entry in type table - let definition_id = definition.this.upcast(); - self.lookup.insert(definition_id, DefinedType{ - ast_definition: definition_id, - definition: DefinedTypeVariant::Function(FunctionType{ - return_type: definition.return_type, - arguments, - }), - poly_args: Vec::new(), - is_polymorph: false, // TODO: @polymorphism - is_pointerlike: false, // TODO: @cyclic - monomorphs: Vec::new(), - }); - - Ok(true) - } - - /// Resolves the basic component definition to an entry in the type table. - /// It will not instantiate any monomorphized instancees of polymorphic - /// component definitions. - fn resolve_base_component_definition(&mut self, ctx: &TypeCtx, root_id: RootId, definition: &Component) -> Result { - debug_assert!(self.lookup.contains_key(&definition.this.upcast()), "base component already resolved"); - - // Check argument types - for param_id in &definition.parameters { - let param = &ctx.heap[*param_id]; - let resolve_result = self.resolve_base_parser_type( - ctx, &definition.poly_vars, root_id, param.parser_type - )?; - if !self.ingest_resolve_result(ctx, resolve_result)? { - return Ok(false) - } - } - - // Construct argument types - let mut arguments = Vec::with_capacity(definition.parameters.len()); - for param_id in &definition.parameters { - let param = &ctx.heap[*param_id]; - arguments.push(FunctionArgument{ - identifier: param.identifier.clone(), - parser_type: param.parser_type - }) - } - - // Check conflict of argument and polyarg identifiers - self.check_identifier_collision( - ctx, root_id, &arguments, |arg| &arg.identifier, "component argument" - )?; - self.check_poly_args_collision(ctx, root_id, &definition.poly_vars)?; - - // Construct entry in type table - let definition_id = definition.this.upcast(); - self.lookup.insert(definition_id, DefinedType{ - ast_definition: definition_id, - definition: DefinedTypeVariant::Component(ComponentType{ - variant: definition.variant, - arguments, - }), - poly_args: Vec::new(), - is_polymorph: false, // TODO: @polymorphism, - is_pointerlike: false, // TODO: @cyclic - monomorphs: Vec::new(), - }); - - Ok(true) - } - - /// Takes a ResolveResult and returns `true` if the caller can happily - /// continue resolving its current type, or `false` if the caller must break - /// resolving the current type and exit to the outer resolving loop. In the - /// latter case the `result` value was `ResolveResult::Unresolved`, implying - /// that the type must be resolved first. - fn ingest_resolve_result(&mut self, ctx: &TypeCtx, result: ResolveResult) -> Result { - match result { - ResolveResult::BuiltIn | ResolveResult::PolyArg => Ok(true), - ResolveResult::Resolved(_) => Ok(true), - ResolveResult::Unresolved((root_id, definition_id)) => { - if self.iter.contains(root_id, definition_id) { - // Cyclic dependency encountered - // TODO: Allow this - let mut error = ParseError2::new_error( - &ctx.modules[root_id.index as usize].source, ctx.heap[definition_id].position(), - "Evaluating this type definition results in a cyclic type" - ); - - for (breadcrumb_idx, (root_id, definition_id)) in self.iter.breadcrumbs.iter().enumerate() { - let msg = if breadcrumb_idx == 0 { - "The cycle started with this definition" - } else { - "Which depends on this definition" - }; - - error = error.with_postfixed_info( - &ctx.modules[root_id.index as usize].source, - ctx.heap[*definition_id].position(), msg - ); - } - - Err(error) - } else { - // Type is not yet resolved, so push IDs on iterator and - // continue the resolving loop - self.iter.push(root_id, definition_id); - Ok(false) - } - } - } - } - - /// Each type definition may consist of several subtypes (the type - /// associated with a union variant, or the type associated with a struct - /// field). This function goes through the subtype tree and determines if - /// each reference type is resolved or not (yet). - fn resolve_base_parser_type(&mut self, ctx: &TypeCtx, poly_vars: &Vec, root_id: RootId, mut parser_type_id: ParserTypeId) -> Result { - use ParserTypeVariant as PTV; - - loop { - let parser_type = &ctx.heap[parser_type_id]; - match &parser_type.variant { - // Builtin types. An array is a builtin as it is implemented as a - // couple of pointers, so we do not require the subtype to be fully - // resolved. - PTV::Array(_) | - PTV::Message | - PTV::Bool | - PTV::Byte | - PTV::Short | - PTV::Int | - PTV::Long | - PTV::String => { - return Ok(ResolveResult::BuiltIn); - }, - // IntegerLiteral types and the inferred marker are not allowed in - // definitions of types - PTV::IntegerLiteral | - PTV::Inferred => { - debug_assert!(false, "Encountered illegal ParserTypeVariant within type definition"); - unreachable!(); - }, - // Input/Output port - // TODO: @unclear Come back to this, do these need to be fully - // resolved? Or do we implement these as the port identifiers? - PTV::Input(subtype_id) | - PTV::Output(subtype_id) => { - parser_type_id = *subtype_id; - }, - PTV::Symbolic(symbolic) => { - // Check if the symbolic type is one of the definition's - // polymorphic arguments. If so then we can halt the - // execution - for poly_arg in poly_vars.iter() { - if poly_arg.value == symbolic.identifier.value { - return Ok(ResolveResult::PolyArg) - } - } - - // Lookup the definition in the symbol table - let symbol = ctx.symbols.resolve_namespaced_symbol(root_id, &symbolic.identifier); - if symbol.is_none() { - return Err(ParseError2::new_error( - &ctx.modules[root_id.index as usize].source, symbolic.identifier.position, - "Could not resolve type" - )) - } - - let (symbol_value, mut ident_iter) = symbol.unwrap(); - match symbol_value.symbol { - Symbol::Namespace(_) => { - // Reference to a namespace instead of a type - return if ident_iter.num_remaining() == 0 { - Err(ParseError2::new_error( - &ctx.modules[root_id.index as usize].source, symbolic.identifier.position, - "Expected a type, got a module name" - )) - } else { - let next_identifier = ident_iter.next().unwrap(); - Err(ParseError2::new_error( - &ctx.modules[root_id.index as usize].source, symbolic.identifier.position, - &format!("Could not find symbol '{}' with this module", String::from_utf8_lossy(next_identifier)) - )) - } - }, - Symbol::Definition((root_id, definition_id)) => { - let definition = &ctx.heap[definition_id]; - if ident_iter.num_remaining > 0 { - // Namespaced identifier is longer than the type - // we found. Return the appropriate message - return if definition.is_struct() || definition.is_enum() { - Err(ParseError2::new_error( - &ctx.modules[root_id.index as usize].source, symbolic.identifier.position, - &format!( - "Unknown type '{}', did you mean to use '{}'?", - String::from_utf8_lossy(&symbolic.identifier.value), - String::from_utf8_lossy(&definition.identifier().value) - ) - )) - } else { - Err(ParseError2::new_error( - &ctx.modules[root_id.index as usize].source, symbolic.identifier.position, - "Unknown type" - )) - } - } - - // Found a match, make sure it is a datatype - if !(definition.is_struct() || definition.is_enum()) { - return Err(ParseError2::new_error( - &ctx.modules[root_id.index as usize].source, symbolic.identifier.position, - "Embedded types must be datatypes (structs or enums)" - )) - } - - // Found a struct/enum definition - return if self.lookup.contains_key(&definition_id) { - Ok(ResolveResult::Resolved((root_id, definition_id))) - } else { - Ok(ResolveResult::Unresolved((root_id, definition_id))) - } - } - } - } - } - } - } - - /// Go through a list of identifiers and ensure that all identifiers have - /// unique names - fn check_identifier_collision &Identifier>( - &self, ctx: &TypeCtx, root_id: RootId, items: &[T], getter: F, item_name: &'static str - ) -> Result<(), ParseError2> { - for (item_idx, item) in items.iter().enumerate() { - let item_ident = getter(item); - for other_item in &items[0..item_idx] { - let other_item_ident = getter(other_item); - if item_ident.value == other_item_ident { - let module_source = &ctx.modules[root_id.index as usize].source; - return Err(ParseError2::new_error( - module_source, item_ident.position, &format!("This {} is defined more than once", item_name) - ).with_postfixed_info( - module_source, other_item_ident.position, &format!("The other {} is defined here", item_name) - )); - } - } - } - - Ok(()) - } - - /// Go through a list of polymorphic arguments and make sure that the - /// arguments all have unique names, and the arguments do not conflict with - /// any symbols defined at the module scope. - fn check_poly_args_collision( - &self, ctx: &TypeCtx, root_id: RootId, poly_args: &[Identifier] - ) -> Result<(), ParseError2> { - // Make sure polymorphic arguments are unique and none of the - // identifiers conflict with any imported scopes - for (arg_idx, poly_arg) in poly_args.iter().enumerate() { - for other_poly_arg in &poly_args[..arg_idx] { - if poly_arg.value == other_poly_arg.value { - let module_source = &ctx.modules[root_id.index as usize].source; - return Err(ParseError2::new_error( - module_source, poly_arg.position, - "This polymorphic argument is defined more than once" - ).with_postfixed_info( - module_source, other_poly_arg.position, - "It conflicts with this polymorphic argument" - )); - } - } - - // Check if identifier conflicts with a symbol defined or imported - // in the current module - if let Some(symbol) = ctx.symbols.resolve_symbol(root_id, &poly_arg.value) { - // We have a conflict - let module_source = &ctx.modules[root_id.index as usize].source; - return Err(ParseError2::new_error( - module_source, poly_arg.position, - "This polymorphic argument conflicts with another symbol" - ).with_postfixed_info( - module_source, symbol.position, - "It conflicts due to this symbol" - )); - } - } - - // All arguments are fine - Ok(()) - } - - //-------------------------------------------------------------------------- - // Small utilities - //-------------------------------------------------------------------------- - - fn enum_tag_type(min_tag_value: i64, max_tag_value: i64) -> PrimitiveType { - // TODO: @consistency tag values should be handled correctly - debug_assert!(min_tag_value < max_tag_value); - let abs_max_value = min_tag_value.abs().max(max_tag_value.abs()); - if abs_max_value <= u8::max_value() as i64 { - PrimitiveType::Byte - } else if abs_max_value <= u16::max_value() as i64 { - PrimitiveType::Short - } else if abs_max_value <= u32::max_value() as i64 { - PrimitiveType::Int - } else { - PrimitiveType::Long - } - } - - fn find_root_id(ctx: &TypeCtx, definition_id: DefinitionId) -> RootId { - // TODO: Keep in lookup or something - for module in ctx.modules { - let root_id = module.root_id; - let root = &ctx.heap[root_id]; - for module_definition_id in root.definitions.iter() { - if module_definition_id == definition_id { - return root_id - } - } - } - - debug_assert!(false, "DefinitionId without corresponding RootId"); - unreachable!(); - } -} \ No newline at end of file diff --git a/src/protocol/parser/type_table_old.rs b/src/protocol/parser/type_table_old.rs new file mode 100644 index 0000000000000000000000000000000000000000..7785670279273b406be984c3421516d1793d3a5a --- /dev/null +++ b/src/protocol/parser/type_table_old.rs @@ -0,0 +1,697 @@ +// TODO: @fix PrimitiveType for enums/unions +use crate::protocol::ast::*; +use crate::protocol::parser::symbol_table::{SymbolTable, Symbol}; +use crate::protocol::inputsource::*; +use crate::protocol::parser::*; + +use std::collections::HashMap; +use crate::common::Formatter; + +#[derive(Copy, Clone, PartialEq, Eq)] +pub enum TypeClass { + Enum, + Union, + Struct, + Function, + Component +} + +impl TypeClass { + pub(crate) fn display_name(&self) -> &'static str { + match self { + TypeClass::Enum => "enum", + TypeClass::Union => "enum", + TypeClass::Struct => "struct", + TypeClass::Function => "function", + TypeClass::Component => "component", + } + } +} + +impl std::fmt::Display for TypeClass { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.display_name()) + } +} + +pub enum DefinedType { + Enum(EnumType), + Union(UnionType), + Struct(StructType), + Function(FunctionType), + Component(ComponentType) +} + +impl DefinedType { + pub(crate) fn type_class(&self) -> TypeClass { + match self { + DefinedType::Enum(_) => TypeClass::Enum, + DefinedType::Union(_) => TypeClass::Union, + DefinedType::Struct(_) => TypeClass::Struct, + DefinedType::Function(_) => TypeClass::Function, + DefinedType::Component(_) => TypeClass::Component + } + } +} + +// TODO: Also support maximum u64 value +pub struct EnumVariant { + identifier: Identifier, + value: i64, +} + +/// `EnumType` is the classical C/C++ enum type. It has various variants with +/// an assigned integer value. The integer values may be user-defined, +/// compiler-defined, or a mix of the two. If a user assigns the same enum +/// value multiple times, we assume the user is an expert and we consider both +/// variants to be equal to one another. +pub struct EnumType { + variants: Vec, + representation: PrimitiveType, +} + +pub struct UnionVariant { + identifier: Identifier, + embedded_type: Option, + tag_value: i64, +} + +/// `UnionType` is the algebraic datatype (or sum type, or discriminated union). +/// A value is an element of the union, identified by its tag, and may contain +/// a single subtype. +pub struct UnionType { + variants: Vec, + tag_representation: PrimitiveType +} + +pub struct StructField { + identifier: Identifier, + field_type: TypeAnnotationId, +} + +pub struct StructType { + fields: Vec, +} + +pub struct FunctionArgument { + identifier: Identifier, + argument_type: TypeAnnotationId, +} + +pub struct FunctionType { + return_type: TypeAnnotationId, + arguments: Vec +} + +pub struct ComponentType { + variant: ComponentVariant, + arguments: Vec +} + +pub struct TypeTable { + lookup: HashMap, +} + +enum LookupResult { + BuiltIn, + Resolved((RootId, DefinitionId)), + Unresolved((RootId, DefinitionId)), + Error((InputPosition, String)), +} + +/// `TypeTable` is responsible for walking the entire lexed AST and laying out +/// the various user-defined types in terms of bytes and offsets. This process +/// may be pseudo-recursive (meaning: it is implemented in a recursive fashion, +/// but not in the recursive-function-call kind of way) in case a type depends +/// on another type to be resolved. +/// TODO: Distinction between resolved types and unresolved types (regarding the +/// mixed use of BuiltIns and resolved types) is a bit yucky at the moment, +/// will have to come up with something nice in the future. +/// TODO: Need to factor out the repeated lookup in some kind of state machine. +impl TypeTable { + pub(crate) fn new( + symbols: &SymbolTable, heap: &Heap, modules: &[LexedModule] + ) -> Result { + if cfg!(debug_assertions) { + for (index, module) in modules.iter().enumerate() { + debug_assert_eq!(index, module.root_id.index as usize) + } + } + + // Estimate total number of definitions we will encounter + let num_definitions = heap.definitions.len(); + let mut table = TypeTable{ + lookup: HashMap::with_capacity(num_definitions), + }; + + // Perform the breadcrumb-based type parsing. For now we do not allow + // cyclic types. + // TODO: Allow cyclic types. However, we want to have an implementation + // that is somewhat efficient: if a type is cyclic, then we have to + // insert a pointer somewhere. However, we don't want to insert them + // everywhere, for each possible type. Without any further context the + // decision to place a pointer somewhere is "random", we need to know + // how the type is used to have an informed opinion on where to place + // the pointer. + enum Breadcrumb { + Linear((usize, usize)), + Jumping((RootId, DefinitionId)) + } + + // Helper to handle the return value from lookup_type_definition. If + // resolved/builtin we return `true`, if an error we complete the error + // and return it. If unresolved then we check for cyclic types and + // return an error if cyclic, otherwise return false (indicating we need + // to continue in the breadcrumb-based resolving loop) + let handle_lookup_result = | + heap: &Heap, all_modules: &[LexedModule], cur_module: &LexedModule, + breadcrumbs: &mut Vec, result: LookupResult + | -> Result { + return match result { + LookupResult::BuiltIn | LookupResult::Resolved(_) => Ok(true), + LookupResult::Unresolved((new_root_id, new_definition_id)) => { + // Check for cyclic dependencies + let mut is_cyclic = false; + for breadcrumb in breadcrumbs.iter() { + let (root_id, definition_id) = match breadcrumb { + Breadcrumb::Linear((root_index, definition_index)) => { + let root_id = all_modules[*root_index].root_id; + let definition_id = heap[root_id].definitions[*definition_index]; + (root_id, definition_id) + }, + Breadcrumb::Jumping(root_id_and_definition_id) => *root_id_and_definition_id + }; + + if root_id == new_root_id && definition_id == new_definition_id { + // Oh noes! + is_cyclic = true; + break; + } + } + + if is_cyclic { + let mut error = ParseError2::new_error( + &all_modules[new_root_id.index as usize].source, + heap[new_definition_id].position(), + "Evaluating this definition results in a a cyclic dependency" + ); + for (index, breadcrumb) in breadcrumbs.iter().enumerate() { + match breadcrumb { + Breadcrumb::Linear((root_index, definition_index)) => { + debug_assert_eq!(index, 0); + error = error.with_postfixed_info( + &all_modules[*root_index].source, + heap[heap[all_modules[*root_index].root_id].definitions[*definition_index]].position(), + "The cycle started with this definition" + ) + }, + Breadcrumb::Jumping((root_id, definition_id)) => { + debug_assert!(index > 0); + error = error.with_postfixed_info( + &all_modules[root_id.index as usize].source, + heap[*definition_id].position(), + "Which depends on this definition" + ) + } + } + } + Err(error) + } else { + breadcrumbs.push(Breadcrumb::Jumping((new_root_id, new_definition_id))); + Ok(false) + } + }, + LookupResult::Error((position, message)) => { + Err(ParseError2::new_error(&cur_module.source, position, &message)) + } + } + }; + + let mut module_index = 0; + let mut definition_index = 0; + let mut breadcrumbs = Vec::with_capacity(32); // if a user exceeds this, the user sucks at programming + while module_index < modules.len() { + // Go to next module if needed + { + let root = &heap[modules[module_index].root_id]; + if definition_index >= root.definitions.len() { + module_index += 1; + definition_index = 0; + continue; + } + } + + // Construct breadcrumbs in case we need to follow some types around + debug_assert!(breadcrumbs.is_empty()); + breadcrumbs.push(Breadcrumb::Linear((module_index, definition_index))); + 'resolve_loop: while !breadcrumbs.is_empty() { + // Retrieve module, the module's root and the definition + let (module, definition_id) = match breadcrumbs.last().unwrap() { + Breadcrumb::Linear((module_index, definition_index)) => { + let module = &modules[*module_index]; + let root = &heap[module.root_id]; + let definition_id = root.definitions[*definition_index]; + (module, definition_id) + }, + Breadcrumb::Jumping((root_id, definition_id)) => { + let module = &modules[root_id.index as usize]; + debug_assert_eq!(module.root_id, *root_id); + (module, *definition_id) + } + }; + + let definition = &heap[definition_id]; + + // Because we might have chased around to this particular + // definition before, we check if we haven't resolved the type + // already. + if table.lookup.contains_key(&definition_id) { + breadcrumbs.pop(); + continue; + } + + match definition { + Definition::Enum(definition) => { + // Check the definition to see if we're dealing with an + // enum or a union. If we find any union variants then + // we immediately check if the type is already resolved. + assert!(definition.poly_vars.is_empty(), "Polyvars for enum not yet implemented"); + let mut has_tag_values = None; + let mut has_int_values = None; + for variant in &definition.variants { + match &variant.value { + EnumVariantValue::None => {}, + EnumVariantValue::Integer(_) => { + if has_int_values.is_none() { has_int_values = Some(variant.position); } + }, + EnumVariantValue::Type(variant_type) => { + if has_tag_values.is_none() { has_tag_values = Some(variant.position); } + + let variant_type = &heap[*variant_type]; + + let lookup_result = lookup_type_definition( + heap, &table, symbols, module.root_id, + &variant_type.the_type.primitive + ); + if !handle_lookup_result(heap, modules, module, &mut breadcrumbs, lookup_result)? { + continue 'resolve_loop; + } + }, + } + } + + if has_tag_values.is_some() && has_int_values.is_some() { + // Not entirely illegal, but probably not desired + let tag_pos = has_tag_values.unwrap(); + let int_pos = has_int_values.unwrap(); + return Err( + ParseError2::new_error(&module.source, definition.position, "Illegal combination of enum integer variant(s) and enum union variant(s)") + .with_postfixed_info(&module.source, int_pos, "Explicitly assigning an integer value here") + .with_postfixed_info(&module.source, tag_pos, "Explicitly declaring a union variant here") + ) + } + + // If here, then the definition is a valid discriminated + // union with all of its types resolved, or a valid + // enum. + + // Decide whether to implement as enum or as union + let is_union = has_tag_values.is_some(); + if is_union { + // Implement as discriminated union. Because we + // checked the availability of types above, we are + // safe to lookup type definitions + let mut tag_value = -1; + let mut variants = Vec::with_capacity(definition.variants.len()); + for variant in &definition.variants { + tag_value += 1; + let embedded_type = match &variant.value { + EnumVariantValue::None => { + None + }, + EnumVariantValue::Type(type_annotation_id) => { + // Type should be resolvable, we checked this above + let type_annotation = &heap[*type_annotation_id]; + // TODO: Remove the assert once I'm clear on how to layout "the types" of types + if cfg!(debug_assertions) { + ensure_type_definition(heap, &table, symbols, module.root_id, &type_annotation.the_type.primitive); + } + + Some(*type_annotation_id) + }, + EnumVariantValue::Integer(_) => { + debug_assert!(false, "Encountered `Integer` variant after asserting enum is a discriminated union"); + unreachable!(); + } + }; + + variants.push(UnionVariant{ + identifier: variant.identifier.clone(), + embedded_type, + tag_value, + }) + } + + table.add_definition(definition_id, DefinedType::Union(UnionType{ + variants, + tag_representation: enum_representation(tag_value) + })); + } else { + // Implement as regular enum + let mut enum_value = -1; // TODO: allow u64 max size + let mut variants = Vec::with_capacity(definition.variants.len()); + for variant in &definition.variants { + enum_value += 1; + match &variant.value { + EnumVariantValue::None => { + variants.push(EnumVariant{ + identifier: variant.identifier.clone(), + value: enum_value, + }); + }, + EnumVariantValue::Integer(override_value) => { + enum_value = *override_value; + variants.push(EnumVariant{ + identifier: variant.identifier.clone(), + value: enum_value, + }); + }, + EnumVariantValue::Type(_) => { + debug_assert!(false, "Encountered `Type` variant after asserting enum is not a discriminated union"); + unreachable!(); + } + } + } + + table.add_definition(definition_id, DefinedType::Enum(EnumType{ + variants, + representation: enum_representation(enum_value), + })); + } + }, + Definition::Struct(definition) => { + // Before we start allocating fields, make sure we can + // actually resolve all of the field types + assert!(definition.poly_vars.is_empty(), "Polyvars for struct not yet implemented"); + for field_definition in &definition.fields { + let type_definition = &heap[field_definition.the_type]; + let lookup_result = lookup_type_definition( + heap, &table, symbols, module.root_id, + &type_definition.the_type.primitive + ); + if !handle_lookup_result(heap, modules, module, &mut breadcrumbs, lookup_result)? { + continue 'resolve_loop; + } + } + + // We can resolve everything + let mut fields = Vec::with_capacity(definition.fields.len()); + for field_definition in &definition.fields { + let type_annotation = &heap[field_definition.the_type]; + if cfg!(debug_assertions) { + ensure_type_definition(heap, &table, symbols, module.root_id, &type_annotation.the_type.primitive); + } + + fields.push(StructField{ + identifier: field_definition.field.clone(), + field_type: field_definition.the_type + }); + } + + table.add_definition(definition_id, DefinedType::Struct(StructType{ + fields, + })); + }, + Definition::Component(definition) => { + // As always, ensure all parameter types are resolved + assert!(definition.poly_vars.is_empty(), "Polyvars for component not yet implemented"); + for parameter_id in &definition.parameters { + let parameter = &heap[*parameter_id]; + let type_definition = &heap[parameter.type_annotation]; + let lookup_result = lookup_type_definition( + heap, &table, symbols, module.root_id, + &type_definition.the_type.primitive + ); + if !handle_lookup_result(heap, modules, module, &mut breadcrumbs, lookup_result)? { + continue 'resolve_loop; + } + } + + // We can resolve everything + let mut parameters = Vec::with_capacity(definition.parameters.len()); + for parameter_id in &definition.parameters { + let parameter = &heap[*parameter_id]; + let type_definition = &heap[parameter.type_annotation]; + if cfg!(debug_assertions) { + ensure_type_definition(heap, &table, symbols, module.root_id, &type_definition.the_type.primitive); + } + + parameters.push(FunctionArgument{ + identifier: parameter.identifier.clone(), + argument_type: parameter.type_annotation, + }); + } + + table.add_definition(definition_id, DefinedType::Component(ComponentType{ + variant: definition.variant, + arguments: parameters, // Arguments, parameters, tomayto, tomahto + })); + }, + Definition::Function(definition) => { + assert!(definition.poly_vars.is_empty(), "Polyvars for function not yet implemented"); + // Handle checking the return type + let lookup_result = lookup_type_definition( + heap, &table, symbols, module.root_id, + &heap[definition.return_type].the_type.primitive + ); + if !handle_lookup_result(heap, modules, module, &mut breadcrumbs, lookup_result)? { + continue 'resolve_loop; + } + + for parameter_id in &definition.parameters { + let parameter = &heap[*parameter_id]; + let type_definition = &heap[parameter.type_annotation]; + let lookup_result = lookup_type_definition( + heap, &table, symbols, module.root_id, + &type_definition.the_type.primitive + ); + if !handle_lookup_result(heap, modules, module, &mut breadcrumbs, lookup_result)? { + continue 'resolve_loop; + } + } + + // Resolve function's types + let mut parameters = Vec::with_capacity(definition.parameters.len()); + for parameter_id in &definition.parameters { + let parameter = &heap[*parameter_id]; + let type_definition = &heap[parameter.type_annotation]; + if cfg!(debug_assertions) { + ensure_type_definition(heap, &table, symbols, module.root_id, &type_definition.the_type.primitive); + } + + parameters.push(FunctionArgument{ + identifier: parameter.identifier.clone(), + argument_type: parameter.type_annotation + }); + } + + table.add_definition(definition_id, DefinedType::Function(FunctionType{ + return_type: definition.return_type.clone(), + arguments: parameters, + })); + }, + } + + // If here, then we layed out the current type definition under + // investigation, so: + debug_assert!(!breadcrumbs.is_empty()); + breadcrumbs.pop(); + } + + // Go to next definition + definition_index += 1; + } + + debug_assert_eq!( + num_definitions, table.lookup.len(), + "expected {} (reserved) definitions in table, got {}", + num_definitions, table.lookup.len() + ); + + Ok(table) + } + + pub(crate) fn get_definition(&self, definition_id: &DefinitionId) -> Option<&DefinedType> { + self.lookup.get(definition_id) + } + + fn add_definition(&mut self, definition_id: DefinitionId, definition: DefinedType) { + debug_assert!(!self.lookup.contains_key(&definition_id), "already added definition"); + self.lookup.insert(definition_id, definition); + } +} + +/// Attempts to lookup a type definition using a namespaced identifier. We have +/// three success cases: the first is simply that the type is a `BuiltIn` type. +/// In the two other cases both we find the definition of the type in the symbol +/// table, but in one case it is already `Resolved` in the type table, and in +/// the other case it is `Unresolved`. In this last case the type has to be +/// resolved before we're able to use it in the `TypeTable` construction +/// algorithm. +/// In the `Error` case something goes wrong with resolving the type. The error +/// message aims to be as helpful as possible to the user. +/// The caller should ensure that the `module_root` is where the `identifier` +/// lives. +fn lookup_type_definition( + heap: &Heap, types: &TypeTable, symbols: &SymbolTable, + module_root: RootId, type_to_resolve: &PrimitiveType +) -> LookupResult { + if let PrimitiveType::Symbolic(type_to_resolve) = type_to_resolve { + let identifier = &type_to_resolve.identifier; + + match symbols.resolve_namespaced_symbol(module_root, identifier) { + None => { + // Failed to find anything at all + LookupResult::Error((identifier.position, String::from("Unknown type"))) + }, + Some((symbol, mut identifier_iter)) => { + match symbol.symbol { + Symbol::Namespace(_root_id) => { + // Reference to a namespace, which is not a type. However, + // the error message depends on whether we have identifiers + // remaining or not + if identifier_iter.num_remaining() == 0 { + LookupResult::Error(( + identifier.position, + String::from("Expected a type, got a module name") + )) + } else { + let next_identifier = identifier_iter.next().unwrap(); + LookupResult::Error(( + identifier.position, + format!("Cannot find symbol '{}' in this module", String::from_utf8_lossy(next_identifier)) + )) + } + }, + Symbol::Definition((definition_root_id, definition_id)) => { + // Got a definition, but we may also have more identifier's + // remaining in the identifier iterator + let definition = &heap[definition_id]; + if identifier_iter.num_remaining() == 0 { + // See if the type is resolved, and make sure it is + // a non-function, non-component type. Ofcourse + // these are valid types, but we cannot (yet?) use + // them as function arguments, struct fields or enum + // variants + match definition { + Definition::Component(definition) => { + return LookupResult::Error(( + identifier.position, + format!( + "Cannot use the component '{}' as an embedded type", + String::from_utf8_lossy(&definition.identifier.value) + ) + )); + }, + Definition::Function(definition) => { + return LookupResult::Error(( + identifier.position, + format!( + "Cannot use the function '{}' as an embedded type", + String::from_utf8_lossy(&definition.identifier.value) + ) + )); + }, + Definition::Enum(_) | Definition::Struct(_) => {} + } + + return if types.lookup.contains_key(&definition_id) { + LookupResult::Resolved((definition_root_id, definition_id)) + } else { + LookupResult::Unresolved((definition_root_id, definition_id)) + } + } else if identifier_iter.num_remaining() == 1 { + // This is always invalid, but if the type is an + // enumeration or a union then we want to return a + // different error message. + if definition.is_enum() { + let last_identifier = identifier_iter.next().unwrap(); + return LookupResult::Error(( + identifier.position, + format!( + "Expected a type, but got a (possible) enum variant '{}'. Only the enum '{}' itself can be used as a type", + String::from_utf8_lossy(last_identifier), + String::from_utf8_lossy(&definition.identifier().value) + ) + )); + } + } + + // Too much identifiers (>1) for an enumeration, or not an + // enumeration and we had more identifiers remaining + LookupResult::Error(( + identifier.position, + format!( + "Unknown type '{}', did you mean to use '{}'?", + String::from_utf8_lossy(&identifier.value), + String::from_utf8_lossy(&definition.identifier().value) + ) + )) + } + } + } + } + } else { + LookupResult::BuiltIn + } +} + +/// Debugging function to ensure a type is resolved (calling +/// `lookup_type_definition` and ensuring its return value is `BuiltIn` or +/// `Resolved` +#[cfg(debug_assertions)] +fn ensure_type_definition( + heap: &Heap, types: &TypeTable, symbols: &SymbolTable, + module_root: RootId, type_to_resolve: &PrimitiveType +) { + match lookup_type_definition(heap, types, symbols, module_root, type_to_resolve) { + LookupResult::BuiltIn | LookupResult::Resolved(_) => {}, + LookupResult::Unresolved((_, definition_id)) => { + assert!( + false, + "Expected that type definition for {} was resolved by the type table, but it wasn't", + String::from_utf8_lossy(&heap[definition_id].identifier().value) + ) + }, + LookupResult::Error((_, error)) => { + let message = if let PrimitiveType::Symbolic(symbolic) = type_to_resolve { + format!( + "Expected that type definition for {} was resolved by the type table, but it returned: {}", + String::from_utf8_lossy(&symbolic.identifier.value), &error + ) + } else { + format!( + "Expected (non-symbolic!?) type definition to be resolved, but it returned: {}", + &error + ) + }; + assert!(false, "{}", message) + } + } +} + +/// Determines an enumeration's integer representation type, or a union's tag +/// type, using the maximum value of the tag. The returned type is always a +/// builtin type. +/// TODO: Fix for maximum u64 value +fn enum_representation(max_tag_value: i64) -> PrimitiveType { + if max_tag_value <= u8::max_value() as i64 { + PrimitiveType::Byte + } else if max_tag_value <= u16::max_value() as i64 { + PrimitiveType::Short + } else if max_tag_value <= u32::max_value() as i64 { + PrimitiveType::Int + } else { + PrimitiveType::Long + } +} \ No newline at end of file diff --git a/src/protocol/parser/visitor_linker.rs b/src/protocol/parser/visitor_linker.rs index 3831aad78e31d5b2fcdbc31d8c7d9225ced3e7e1..a3e460dfe95b2352b25dda07250c9618f1471ee5 100644 --- a/src/protocol/parser/visitor_linker.rs +++ b/src/protocol/parser/visitor_linker.rs @@ -698,7 +698,6 @@ impl ValidityAndLinkerVisitor { // the traversal of the block's statements. let body = &mut ctx.heap[id]; body.parent_scope = self.cur_scope.clone(); - println!("DEBUG: Assigning relative {} to block {}", self.relative_pos_in_block, id.0.index); body.relative_pos_in_parent = self.relative_pos_in_block; let old_scope = self.cur_scope.replace(match hint { @@ -980,9 +979,9 @@ impl ValidityAndLinkerVisitor { match &symbol.symbol { Symbol::Definition((_, definition_id)) => { // Make sure definition is of the expected type - let definition_type = types.get_definition(definition_id); + let definition_type = types.get_base_definition(&definition_id); debug_assert!(definition_type.is_some(), "Found symbol '{}' in symbol table, but not in type table", String::from_utf8_lossy(&identifier.value)); - let definition_type_class = definition_type.unwrap().type_class(); + let definition_type_class = definition_type.unwrap().definition.type_class(); if definition_type_class != expected_type_class { FindOfTypeResult::TypeMismatch(definition_type_class.display_name())