From ef3eec73f35900af48622f4a07ea31430ee713b4 2021-03-10 15:49:03 From: MH Date: 2021-03-10 15:49:03 Subject: [PATCH] WIP on reimplementation of type table supporting polymorphic types --- diff --git a/notes_max.md b/notes_max.md index 34bda06763a5e4a744378997eaa7a6b777e76a8f..fcfb3e8b6d9fe970f0a2548a7741642c64bbf4e9 100644 --- a/notes_max.md +++ b/notes_max.md @@ -1,3 +1,98 @@ # Noteworthy changes -- `if`, `while`, `synchronous`, `function`, `primitive` and `composite` body statements, if not yet a block, are all converted into a block for simpler parsing/compiling. \ No newline at end of file +- `if`, `while`, `synchronous`, `function`, `primitive` and `composite` body statements, if not yet a block, are all converted into a block for simpler parsing/compiling. + +# Thinkings: Type System + +## Basic Types + +Builtin primitive types: + +- Boolean: `bool` +- Unsigned integers: `u8`, `u16`, `u32`, `u64` +- Signed integers: `i8`, `i16`, `i32`, `i64` +- Decimal numbers: `f32`, `f64` +- Message-based: `in`, `out`, `msg` + +Builtin compound types: + +- Array: `array` + +User-specified types (excluding polymorphism for now): + +- Connectors: `primitive`, `composite` +- Functions: `func` +- Enums: `enum` +- Unions: `enum` +- Structs: `struct` + +## Polymorphic Type Definitions + +Various types may be polymorphic. We will exclude builtin polymorphs from the following considerations (i.e. `in`, `out` and `array`, and their associated functions). + +Connectors, functions, enums, unions and structs may all be polymorphic. We'll use the C++/rust/Java syntax for specifying polymorphic types: + +```pdl +primitive fifo(in i, out o) { + // impl +} + +T pick_one_of_two(T one, T two) { + // impl +} + +enum Option{ None, Some(T) } +enum Result{ Ok(T), Err(E) } +struct Pair { T first, T second } +``` + +This means that during the initial validation/linker phase all of the types may have polymorphic arguments. These polyargs have just an identifier, we can only determine the concrete type when we encounter it in another body where we either defer the type or where the user has explicitly instantiated the type. + +For now we will use the C++-like trait/interfaceless polymorphism: once we know all of the polymorphic arguments we will try to monomorphize the type and check whether or not all calls make sense. This in itself is a recursive operation: inside the polymorph we may use other polymorphic types. + +## Polymorphic Type Usage + +Within functions and connectors we may employ polymorphic types. The specification of the polymorphic arguments is not required if they can be inferred. Polymorphic arguments may be partially specified by using somekind of throwaway `_` or `auto` type. When we are monomorphizing a type we must be able to fully determine all of the polymorphic arguments, if we can't then we throw a compiler error. + +## Conclusions + +- + Type definitions may contain polymorphic arguments. These are simple identifiers. The polymorphic arguments are embedded in definitions, they are erased after type inference on the type's use (e.g. function call or struct instantiation) has finished. + +- + While instantiating polymorphic types (e.g. `T something = call_a_func(3, 5)` or the inferred variant `let something = call_a_func(3, 5)`) we actually need to resolve these types. This implies: + + - Memory declarations will have their type embedded within the memory declaration itself. If we do not know the type yet, or only partially know the type, then we need to mark the type as being inferred. + - Expressions will have their type inferred from the constraints the expression imposes on its arguments, and the types of the arguments themselves. Within an expression tree type information may flow from the root towards the leaves or from the leaves towards the root. + + This implies that type information in the AST must be fully specified. If the type information is not yet fully known then it must be embedded in some kind of datastructure that is used for type inference. + +- + A function definition with polyargs seems to behave a bit differently: During the initial definition parsing we may want to make sure that each type in the definition either resolves to a concrete type (optionally with polymorphic arguments) or to one of the polyargs. This would also provide information to the type inference algorithm. + +## Conclusions - Parsing + +During parsing we want to embed all of the information the programmer has provided to the compiler in the AST. So polyargs go inside type definitions, types that are supposed to be inferred are embedded in the AST, types that depend on polyargs also go into the AST. So we may have: + +- Partially concrete types with embedded inferred types (e.g. `let[] list_of_things` is an `Array` or `Result thing = Result::Ok(5)` or `let thing = Result::Ok(5)`). +- Direct references to polyarg types (e.g. `T list_of_things` or `Result thing = ...` or `T[][] array_of_arrays_yes_sir` and `Tuple2 = construct_a_thing()`) +- Fully inferred types (e.g. `auto thing = 5` or `auto result = call_a_thing()`) +- Completely specified types. + +## Conclusions - Type Inference + +During type inference and typechecking we need to determine all concrete types. So we will arrive at a fully specified type: every polymorphic type will have its polyargs specified. Likewise every inferred type will be fully specified as well. + +All of this hints at having to specify two classes of types: `ParserType` and a `Type`. The `ParserType` can be: + +- A builtin type (with embedded `ParserType`s where appropriate) +- An inferred type +- A polyarg type +- A symbolic type (with embedded `ParsedType`s where appropriate) + +While the final `Type` can be: + +- A builtin type (with embedded types where appropriate) +- A known user-defined type (with embedded types where appropriate) + +Note that we cannot typecheck polymorphic connectors and functions until we monomorphize them. \ No newline at end of file diff --git a/src/protocol/ast.rs b/src/protocol/ast.rs index beb2f578d53544d1fa9d9847cdc9efae0ef510b5..d81f79855f2d874722ac8ac8596ef90cbb493b89 100644 --- a/src/protocol/ast.rs +++ b/src/protocol/ast.rs @@ -34,7 +34,7 @@ macro_rules! define_new_ast_id { define_aliased_ast_id!(RootId, Id); define_aliased_ast_id!(PragmaId, Id); define_aliased_ast_id!(ImportId, Id); -define_aliased_ast_id!(TypeAnnotationId, Id); +define_aliased_ast_id!(ParserTypeId, Id); define_aliased_ast_id!(VariableId, Id); define_new_ast_id!(ParameterId, VariableId); @@ -84,8 +84,6 @@ define_new_ast_id!(VariableExpressionId, ExpressionId); // TODO: @cleanup - pub qualifiers can be removed once done #[derive(Debug, serde::Serialize, serde::Deserialize)] pub struct Heap { - // Allocators - // #[serde(skip)] string_alloc: StringAllocator, // Root arena, contains the entry point for different modules. Each root // contains lists of IDs that correspond to the other arenas. pub(crate) protocol_descriptions: Arena, @@ -93,7 +91,7 @@ pub struct Heap { pragmas: Arena, pub(crate) imports: Arena, identifiers: Arena, - pub(crate) type_annotations: Arena, + pub(crate) parser_types: Arena, pub(crate) variables: Arena, pub(crate) definitions: Arena, pub(crate) statements: Arena, @@ -108,19 +106,20 @@ impl Heap { pragmas: Arena::new(), imports: Arena::new(), identifiers: Arena::new(), - type_annotations: Arena::new(), + parser_types: Arena::new(), variables: Arena::new(), definitions: Arena::new(), statements: Arena::new(), expressions: Arena::new(), } } - pub fn alloc_type_annotation( + pub fn alloc_parser_type( &mut self, - f: impl FnOnce(TypeAnnotationId) -> TypeAnnotation, - ) -> TypeAnnotationId { - self.type_annotations.alloc_with_id(|id| f(id)) + f: impl FnOnce(ParserTypeId) -> ParserType, + ) -> ParserTypeId { + self.parser_types.alloc_with_id(|id| f(id)) } + pub fn alloc_parameter(&mut self, f: impl FnOnce(ParameterId) -> Parameter) -> ParameterId { ParameterId( self.variables.alloc_with_id(|id| Variable::Parameter(f(ParameterId(id)))), @@ -469,6 +468,13 @@ impl IndexMut for Heap { } } +impl Index for Heap { + type Output = ParserType; + fn index(&self, index: ParserTypeId) -> &Self::Output { + &self.parser_types[index.index] + } +} + impl Index for Heap { type Output = TypeAnnotation; fn index(&self, index: TypeAnnotationId) -> &Self::Output { @@ -1028,6 +1034,76 @@ impl Display for Identifier { } } +/// TODO: @cleanup Maybe handle this differently, preallocate in heap? The +/// reason I'm handling it like this now is so we don't allocate types in +/// the `Arena` structure if they're the common types defined here. +pub enum ParserTypeVariant { + // Basic builtin + Message, + Bool, + Byte, + Short, + Int, + Long, + String, + // Literals (need to get concrete builtin type during typechecking) + IntegerLiteral, + Inferred, + // Complex builtins + Array(ParserTypeId), // array of a type + Input(ParserTypeId), // typed input endpoint of a channel + Output(ParserTypeId), // typed output endpoint of a channel + Symbolic(SymbolicParserType), // symbolic type (definition or polyarg) +} + +impl ParserTypeVariant { + pub(crate) fn supports_polymorphic_args(&self) -> bool { + use ParserTypeVariant::*; + match ParserTypeVariant { + Message | Bool | Byte | Short | Int | Long | String | IntegerLiteral | Inferred => false, + _ => true + } + } +} + +/// ParserType is a specification of a type during the parsing phase and initial +/// linker/validator phase of the compilation process. These types may be +/// (partially) inferred or represent literals (e.g. a integer whose bytesize is +/// not yet determined). +#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)] +pub struct ParserType { + pub this: ParserTypeId, + pub pos: InputPosition, + pub variant: ParserTypeVariant, +} + +/// SymbolicParserType is the specification of a symbolic type. During the +/// parsing phase we will only store the identifier of the type. During the +/// validation phase we will determine whether it refers to a user-defined type, +/// or a polymorphic argument. After the validation phase it may still be the +/// case that the resulting `variant` will not pass the typechecker. +#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] +pub struct SymbolicParserType { + // Phase 1: parser + pub identifier: NamespacedIdentifier, + /// The user-specified polymorphic arguments. Zero-length implies that the + /// user did not specify any of them, and they're either not needed or all + /// need to be inferred. Otherwise the number of polymorphic arguments must + /// match those of the corresponding definition + pub poly_args: Vec, + // Phase 2: validation/linking + pub variant: Option +} + +/// Specifies whether the symbolic type points to an actual user-defined type, +/// or whether it points to a polymorphic argument within the definition (e.g. +/// a defined variable `T var` within a function `int func()` +#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] +pub enum SymbolicParserTypeVariant { + Definition(DefinitionId), + PolyArg((DefinitionId, u32)), // index of polyarg in the definition +} + #[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)] pub enum PrimitiveType { Input, @@ -1294,7 +1370,7 @@ pub struct Parameter { pub this: ParameterId, // Phase 1: parser pub position: InputPosition, - pub type_annotation: TypeAnnotationId, + pub parser_type: ParserTypeId, pub identifier: Identifier, } @@ -1309,7 +1385,7 @@ pub struct Local { pub this: LocalId, // Phase 1: parser pub position: InputPosition, - pub type_annotation: TypeAnnotationId, + pub parser_type: ParserTypeId, pub identifier: Identifier, // Phase 2: linker pub relative_pos_in_block: u32, @@ -1428,7 +1504,7 @@ impl VariableScope for Definition { pub struct StructFieldDefinition { pub position: InputPosition, pub field: Identifier, - pub the_type: TypeAnnotationId, + pub parser_type: ParserTypeId, } #[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] @@ -1437,6 +1513,7 @@ pub struct StructDefinition { // Phase 1: parser pub position: InputPosition, pub identifier: Identifier, + pub poly_vars: Vec, pub fields: Vec } @@ -1444,7 +1521,7 @@ pub struct StructDefinition { pub enum EnumVariantValue { None, Integer(i64), - Type(TypeAnnotationId), + Type(ParserTypeId), } #[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] @@ -1460,6 +1537,7 @@ pub struct EnumDefinition { // Phase 1: parser pub position: InputPosition, pub identifier: Identifier, + pub poly_vars: Vec, pub variants: Vec, } @@ -1476,6 +1554,7 @@ pub struct Component { pub position: InputPosition, pub variant: ComponentVariant, pub identifier: Identifier, + pub poly_vars: Vec, pub parameters: Vec, pub body: StatementId, } @@ -1491,8 +1570,9 @@ pub struct Function { pub this: FunctionId, // Phase 1: parser pub position: InputPosition, - pub return_type: TypeAnnotationId, + pub return_type: ParserTypeId, pub identifier: Identifier, + pub poly_vars: Vec, pub parameters: Vec, pub body: StatementId, } @@ -1930,6 +2010,11 @@ impl SyntaxElement for MemoryStatement { } } +/// ChannelStatement is the declaration of an input and output port associated +/// with the same channel. Note that the polarity of the ports are from the +/// point of view of the component. So an output port is something that a +/// component uses to send data over (i.e. it is the "input end" of the +/// channel), and vice versa. #[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] pub struct ChannelStatement { pub this: ChannelStatementId, diff --git a/src/protocol/lexer.rs b/src/protocol/lexer.rs index dd4cf28037a2be43a03cfdab7ab94740b8170131..1f9139daca0961a9871daf0ad625ca6bb45ff1f1 100644 --- a/src/protocol/lexer.rs +++ b/src/protocol/lexer.rs @@ -284,6 +284,7 @@ impl Lexer<'_> { || self.has_keyword(b"short") || self.has_keyword(b"int") || self.has_keyword(b"long") + || self.has_keyword(b"auto") } fn has_builtin_keyword(&self) -> bool { self.has_keyword(b"get") @@ -356,6 +357,231 @@ impl Lexer<'_> { } // Types and type annotations + + /// Consumes a type definition. When called the input position should be at + /// the type specification. When done the input position will be at the end + /// of the type specifications (hence may be at whitespace). + fn consume_type2(&mut self, h: &mut Heap, allow_inference: bool) -> Result { + // Small helper function to convert in/out polymorphic arguments + let reduce_port_poly_args = | + heap: &mut Heap, + port_pos: &InputPosition, + args: Vec, + | { + match args.len() { + 0 => Ok(h.alloc_parser_type(|this| ParserType{ + this, + pos: port_pos.clone(), + variant: ParserTypeVariant::Inferred + })), + 1 => Ok(args[0]), + _ => Err(()) + } + }; + + // Consume the type + let pos = self.source.pos(); + let parser_type_variant = if self.has_keyword(b"msg") { + self.consume_keyword(b"msg"); + ParserTypeVariant::Message + } else if self.has_keyword(b"boolean") { + self.consume_keyword(b"boolean"); + ParserTypeVariant::Bool + } else if self.has_keyword(b"byte") { + self.consume_keyword(b"byte"); + ParserTypeVariant::Byte + } else if self.has_keyword(b"short") { + self.consume_keyword(b"short"); + ParserTypeVariant::Short + } else if self.has_keyword(b"int") { + self.consume_keyword(b"int"); + ParserTypeVariant::Int + } else if self.has_keyword(b"long") { + self.consume_keyword(b"long"); + ParserTypeVariant::Long + } else if self.has_keyword(b"str") { + self.consume_keyword(b"str"); + ParserTypeVariant::String + } else if self.has_keyword(b"auto") { + if !allow_inference { + return Err(ParseError2::new_error( + &self.source, pos, + "Type inference is not allowed here" + )); + } + + self.consume_keyword(b"auto"); + ParserTypeVariant::Inferred + } else if self.has_keyword(b"in") { + // TODO: @cleanup: not particularly neat to have this special case + // where we enforce polyargs in the parser-phase + self.consume_keyword(b"in"); + let poly_args = self.consume_polymorphic_args(h, allow_inference)?; + let poly_arg = reduce_port_poly_args(h, &pos, poly_args) + .map_err(|| ParseError2::new_error( + &self.source, pos, "'in' type only accepts up to 1 polymorphic argument" + ))?; + ParserTypeVariant::Input(poly_arg) + } else if self.has_keyword(b"out") { + self.consume_keyword(b"out"); + let poly_args = self.consume_polymorphic_args(h, allow_inference)?; + let poly_arg = reduce_port_poly_args(h, &pos, poly_args) + .map_err(|| ParseError2::new_error( + &self.source, pos, "'out' type only accepts up to 1 polymorphic argument" + ))?; + ParserTypeVariant::Output(poly_arg) + } else { + // Must be a symbolic type + let identifier = self.consume_namespaced_identifier()?; + let poly_args = self.consume_polymorphic_args(h, allow_inference)?; + ParserTypeVariant::Symbolic(SymbolicParserType{identifier, poly_args, variant: None}) + }; + + // If the type was a basic type (not supporting polymorphic type + // arguments), then we make sure the user did not specify any of them. + let mut backup_pos = self.source.pos(); + if !parser_type.supports_polymorphic_args() { + self.consume_whitespace(false)?; + if let Some(b'<') = self.source.next() { + return Err(ParseError2::new_error( + &self.source, self.source.pos(), + "This type does not allow polymorphic arguments" + )); + } + + self.source.seek(backup_pos); + } + + let mut parser_type_id = h.alloc_parser_type(|this| ParserType{ + this, pos, variant: parser_type_variant + }); + + // If we're dealing with arrays, then we need to wrap the currently + // parsed type in array types + self.consume_whitespace(false)?; + while let Some(b'[') = self.source.next() { + let pos = self.source.pos(); + self.source.consume(); + self.consume_whitespace(false)?; + if let Some(b']') = self.source.next() { + // Type is wrapped in an array + self.source.consume(); + parser_type_id = h.alloc_parser_type(|this| ParserType{ + this, pos, variant: ParserTypeVariant::Array(parser_type_id) + }); + backup_pos = self.source.pos(); + + // In case we're dealing with another array + self.consume_whitespace(false)?; + } else { + return Err(ParseError2::new_error( + &self.source, pos, + "Expected a closing ']'" + )); + } + } + + self.source.seek(backup_pos); + Ok(parser_type_id) + } + + /// Consumes polymorphic arguments and its delimiters if specified. The + /// input position may be at whitespace. If polyargs are present then the + /// whitespace and the args are consumed and the input position will be + /// placed after the polyarg list. If polyargs are not present then the + /// input position will remain unmodified and an empty vector will be + /// returned. + /// + /// Polymorphic arguments represent the specification of the parametric + /// types of a polymorphic type: they specify the value of the polymorphic + /// type's polymorphic variables. + fn consume_polymorphic_args(&mut self, h: &mut Heap, allow_inference: bool) -> Result, ParseError2> { + let backup_pos = self.source.pos(); + self.consume_whitespace(false)?; + if let Some(b'<') = self.source.next() { + // Has polymorphic args, at least one type must be specified + self.source.consume(); + self.consume_whitespace(false)?; + let mut poly_args = Vec::new(); + + loop { + // TODO: @cleanup, remove the no_more_types var + poly_args.push(self.consume_type2(h, allow_inference)?); + self.consume_whitespace(false)?; + + let has_comma = self.source.next() == Some(b','); + if has_comma { + // We might not actually be getting more types when the + // comma is at the end of the line, and we get a closing + // angular bracket on the next line. + self.source.consume(); + self.consume_whitespace(false)?; + } + + if let Some(b'>') = self.source.next() { + self.source.consume(); + break; + } else if !has_comma { + return Err(ParseError2::new_error( + &self.source, self.source.pos(), + "Expected the end of the polymorphic argument list" + )) + } + } + + Ok(poly_args) + } else { + // No polymorphic args + self.source.seek(backup_pos); + Ok(vec!()) + } + } + + /// Consumes polymorphic variables. These are identifiers that are used + /// within polymorphic types. The input position may be at whitespace. If + /// polymorphic variables are present then the whitespace, wrapping + /// delimiters and the polymorphic variables are consumed. Otherwise the + /// input position will stay where it is. If no polymorphic variables are + /// present then an empty vector will be returned. + fn consume_polymorphic_vars(&mut self) -> Result, ParseError2> { + let backup_pos = self.source.pos(); + self.consume_whitespace(false)?; + if let Some(b'<') = self.source.next() { + // Found the opening delimiter, we want at least one polyvar + self.source.consume(); + self.consume_whitespace(false)?; + let mut poly_vars = Vec::new(); + + loop { + poly_vars.push(self.consume_identifier()?); + self.consume_whitespace(false)?; + + let has_comma = self.source.next() == Some(b','); + if has_comma { + // We may get another variable + self.source.consume(); + self.consume_whitespace(false)?; + } + + if let Some(b'>') = self.source.next() { + self.source.consume(); + break; + } else if !has_comma { + return Err(ParseError2::new_error( + &self.source, self.source.pos(), + "Expected the end of the polymorphic variable list" + )) + } + } + + Ok(poly_vars) + } else { + // No polymorphic args + self.source.seek(backup_pos); + Ok(vec!()) + } + } + fn consume_primitive_type(&mut self) -> Result { if self.has_keyword(b"in") { self.consume_keyword(b"in")?; @@ -381,9 +607,9 @@ impl Lexer<'_> { } else if self.has_keyword(b"long") { self.consume_keyword(b"long")?; Ok(PrimitiveType::Long) - } else if self.has_keyword(b"let") { + } else if self.has_keyword(b"auto") { // TODO: @types - return Err(self.error_at_pos("inferred types using 'let' are reserved, but not yet implemented")); + return Err(self.error_at_pos("inferred types using 'auto' are reserved, but not yet implemented")); } else { let identifier = self.consume_namespaced_identifier()?; Ok(PrimitiveType::Symbolic(PrimitiveSymbolic{ @@ -439,12 +665,12 @@ impl Lexer<'_> { // Parameters fn consume_parameter(&mut self, h: &mut Heap) -> Result { - let position = self.source.pos(); - let type_annotation = self.consume_type_annotation(h)?; + let parser_type = self.consume_type2(h, false)?; self.consume_whitespace(true)?; + let position = self.source.pos(); let identifier = self.consume_identifier()?; let id = - h.alloc_parameter(|this| Parameter { this, position, type_annotation, identifier }); + h.alloc_parameter(|this| Parameter { this, position, parser_type, identifier }); Ok(id) } fn consume_parameters( @@ -1350,44 +1576,68 @@ impl Lexer<'_> { &mut self, h: &mut Heap, ) -> Result { + // Consume channel statement and polymorphic argument if specified let position = self.source.pos(); self.consume_keyword(b"channel")?; + + let poly_args = self.consume_polymorphic_args(h, true)?; + let poly_arg_id = match poly_args.len() { + 0 => h.alloc_parser_type(|this| ParserType{ + this, pos: position.clone(), variant: ParserTypeVariant::Inferred, + }), + 1 => poly_args[0], + _ => return Err(ParseError2::new_error( + &self.source, self.source.pos(), + "port construction using 'channel' accepts up to 1 polymorphic argument" + )) + }; self.consume_whitespace(true)?; - let from_annotation = self.create_type_annotation_output(h)?; - let from_identifier = self.consume_identifier()?; + + // Consume the output port + let out_parser_type = h.alloc_parser_type(|this| ParserType{ + this, pos: position.clone(), variant: ParserTypeVariant::Output(poly_arg_id) + }); + let out_identifier = self.consume_identifier()?; + + // Consume the "->" syntax self.consume_whitespace(false)?; self.consume_string(b"->")?; self.consume_whitespace(false)?; - let to_annotation = self.create_type_annotation_input(h)?; - let to_identifier = self.consume_identifier()?; + + // Consume the input port + // TODO: Unsure about this, both ports refer to the same ParserType, is this ok? + let in_parser_type = h.alloc_parser_type(|this| ParserType{ + this, pos: position.clone(), variant: ParserTypeVariant::Output(poly_arg_id) + }); + let in_identifier = self.consume_identifier()?; self.consume_whitespace(false)?; self.consume_string(b";")?; - let from = h.alloc_local(|this| Local { + let out_port = h.alloc_local(|this| Local { this, position, - type_annotation: from_annotation, - identifier: from_identifier, + parser_type: out_parser_type, + identifier: out_identifier, relative_pos_in_block: 0 }); - let to = h.alloc_local(|this| Local { + let in_port = h.alloc_local(|this| Local { this, position, - type_annotation: to_annotation, - identifier: to_identifier, + parser_type: in_parser_type, + identifier: in_identifier, relative_pos_in_block: 0 }); Ok(h.alloc_channel_statement(|this| ChannelStatement { this, position, - from, - to, + from: out_port, + to: in_port, relative_pos_in_block: 0, next: None, })) } fn consume_memory_statement(&mut self, h: &mut Heap) -> Result { let position = self.source.pos(); - let type_annotation = self.consume_type_annotation(h)?; + let parser_type = self.consume_type2(h, true)?; self.consume_whitespace(true)?; let identifier = self.consume_identifier()?; self.consume_whitespace(false)?; @@ -1397,7 +1647,7 @@ impl Lexer<'_> { let variable = h.alloc_local(|this| Local { this, position, - type_annotation, + parser_type, identifier, relative_pos_in_block: 0 }); @@ -1633,11 +1883,12 @@ impl Lexer<'_> { } } fn consume_struct_definition(&mut self, h: &mut Heap) -> Result { - // Parse "struct" keyword and its identifier + // Parse "struct" keyword, optional polyvars and its identifier let struct_pos = self.source.pos(); self.consume_keyword(b"struct")?; self.consume_whitespace(true)?; let struct_ident = self.consume_identifier()?; + let poly_vars = self.consume_polymorphic_vars()?; self.consume_whitespace(false)?; // Parse struct fields @@ -1653,7 +1904,7 @@ impl Lexer<'_> { // Consume field definition self.consume_whitespace(false)?; let field_position = self.source.pos(); - let field_type = self.consume_type_annotation(h)?; + let field_parser_type = self.consume_type2(h, false)?; self.consume_whitespace(true)?; let field_ident = self.consume_identifier()?; self.consume_whitespace(false)?; @@ -1661,7 +1912,7 @@ impl Lexer<'_> { fields.push(StructFieldDefinition{ position: field_position, field: field_ident, - the_type: field_type, + parser_type: field_parser_type, }); // If we have a comma, then we may or may not have another field @@ -1685,15 +1936,17 @@ impl Lexer<'_> { this, position: struct_pos, identifier: struct_ident, + poly_vars, fields, })) } fn consume_enum_definition(&mut self, h: &mut Heap) -> Result { - // Parse "enum" keyword and its identifier + // Parse "enum" keyword, optional polyvars and its identifier let enum_pos = self.source.pos(); self.consume_keyword(b"enum")?; self.consume_whitespace(true)?; let enum_ident = self.consume_identifier()?; + let poly_vars = self.consume_polymorphic_vars()?; self.consume_whitespace(false)?; // Parse enum variants @@ -1730,7 +1983,7 @@ impl Lexer<'_> { } else if let Some(b'(') = next { self.source.consume(); self.consume_whitespace(false)?; - let variant_type = self.consume_type_annotation(h)?; + let variant_type = self.consume_type2(h, false)?; self.consume_whitespace(false)?; self.consume_string(b")")?; self.consume_whitespace(false)?; @@ -1769,6 +2022,7 @@ impl Lexer<'_> { this, position: enum_pos, identifier: enum_ident, + poly_vars, variants, })) } @@ -1781,48 +2035,79 @@ impl Lexer<'_> { } } fn consume_composite_definition(&mut self, h: &mut Heap) -> Result { + // Parse keyword, optional polyvars and the identifier let position = self.source.pos(); self.consume_keyword(b"composite")?; self.consume_whitespace(true)?; let identifier = self.consume_identifier()?; + let poly_vars = self.consume_polymorphic_vars()?; self.consume_whitespace(false)?; + + // Consume parameters let mut parameters = Vec::new(); self.consume_parameters(h, &mut parameters)?; self.consume_whitespace(false)?; + + // Parse body let body = self.consume_block_statement(h)?; Ok(h.alloc_component(|this| Component { - this, variant: ComponentVariant::Composite, position, identifier, parameters, body + this, + variant: ComponentVariant::Composite, + position, + identifier, + poly_vars, + parameters, + body })) } fn consume_primitive_definition(&mut self, h: &mut Heap) -> Result { + // Consume keyword, optional polyvars and identifier let position = self.source.pos(); self.consume_keyword(b"primitive")?; self.consume_whitespace(true)?; let identifier = self.consume_identifier()?; + let poly_vars = self.consume_polymorphic_vars()?; self.consume_whitespace(false)?; + + // Consume parameters let mut parameters = Vec::new(); self.consume_parameters(h, &mut parameters)?; self.consume_whitespace(false)?; + + // Consume body let body = self.consume_block_statement(h)?; Ok(h.alloc_component(|this| Component { - this, variant: ComponentVariant::Primitive, position, identifier, parameters, body + this, + variant: ComponentVariant::Primitive, + position, + identifier, + poly_vars, + parameters, + body })) } fn consume_function_definition(&mut self, h: &mut Heap) -> Result { + // Consume return type, optional polyvars and identifier let position = self.source.pos(); let return_type = self.consume_type_annotation(h)?; self.consume_whitespace(true)?; let identifier = self.consume_identifier()?; + let poly_vars = self.consume_polymorphic_vars()?; self.consume_whitespace(false)?; + + // Consume parameters let mut parameters = Vec::new(); self.consume_parameters(h, &mut parameters)?; self.consume_whitespace(false)?; + + // Consume body let body = self.consume_block_statement(h)?; Ok(h.alloc_function(|this| Function { this, position, return_type, identifier, + poly_vars, parameters, body, })) diff --git a/src/protocol/parser/mod.rs b/src/protocol/parser/mod.rs index 96f187554c6f63d8e39d92ce52b4444891d61ac9..6ea199137f075936102043401f84da271468e207 100644 --- a/src/protocol/parser/mod.rs +++ b/src/protocol/parser/mod.rs @@ -1,6 +1,7 @@ mod depth_visitor; mod symbol_table; mod type_table; +mod type_table2; mod type_resolver; mod visitor; mod visitor_linker; diff --git a/src/protocol/parser/symbol_table.rs b/src/protocol/parser/symbol_table.rs index e194388b17347616e7e9cf1dd74957b2b71143a1..ed684488fbc1a30862e45d0ad78ec5753e8ecc46 100644 --- a/src/protocol/parser/symbol_table.rs +++ b/src/protocol/parser/symbol_table.rs @@ -23,8 +23,11 @@ pub(crate) enum Symbol { } pub(crate) struct SymbolValue { - // Position refers to the origin of the symbol definition (i.e. the module's - // RootId that is in the key being used to lookup this value) + // Position is the place where the symbol is introduced to a module (this + // position always corresponds to the module whose RootId is stored in the + // `SymbolKey` associated with this `SymbolValue`). For a definition this + // is the position where the symbol is defined, for an import this is the + // position of the import statement. pub(crate) position: InputPosition, pub(crate) symbol: Symbol, } diff --git a/src/protocol/parser/type_table.rs b/src/protocol/parser/type_table.rs index 5a451562852a60bea1170283f5fa32e25cfea099..7785670279273b406be984c3421516d1793d3a5a 100644 --- a/src/protocol/parser/type_table.rs +++ b/src/protocol/parser/type_table.rs @@ -76,6 +76,9 @@ pub struct UnionVariant { tag_value: i64, } +/// `UnionType` is the algebraic datatype (or sum type, or discriminated union). +/// A value is an element of the union, identified by its tag, and may contain +/// a single subtype. pub struct UnionType { variants: Vec, tag_representation: PrimitiveType @@ -272,6 +275,7 @@ impl TypeTable { // Check the definition to see if we're dealing with an // enum or a union. If we find any union variants then // we immediately check if the type is already resolved. + assert!(definition.poly_vars.is_empty(), "Polyvars for enum not yet implemented"); let mut has_tag_values = None; let mut has_int_values = None; for variant in &definition.variants { @@ -388,6 +392,7 @@ impl TypeTable { Definition::Struct(definition) => { // Before we start allocating fields, make sure we can // actually resolve all of the field types + assert!(definition.poly_vars.is_empty(), "Polyvars for struct not yet implemented"); for field_definition in &definition.fields { let type_definition = &heap[field_definition.the_type]; let lookup_result = lookup_type_definition( @@ -419,6 +424,7 @@ impl TypeTable { }, Definition::Component(definition) => { // As always, ensure all parameter types are resolved + assert!(definition.poly_vars.is_empty(), "Polyvars for component not yet implemented"); for parameter_id in &definition.parameters { let parameter = &heap[*parameter_id]; let type_definition = &heap[parameter.type_annotation]; @@ -452,6 +458,7 @@ impl TypeTable { })); }, Definition::Function(definition) => { + assert!(definition.poly_vars.is_empty(), "Polyvars for function not yet implemented"); // Handle checking the return type let lookup_result = lookup_type_definition( heap, &table, symbols, module.root_id, diff --git a/src/protocol/parser/type_table2.rs b/src/protocol/parser/type_table2.rs new file mode 100644 index 0000000000000000000000000000000000000000..b7becfa5344a0ec3bb90aa05b7d2d01b40e945fa --- /dev/null +++ b/src/protocol/parser/type_table2.rs @@ -0,0 +1,964 @@ +/** +TypeTable + +Contains the type table: a datastructure that, when compilation succeeds, +contains a concrete type definition for each AST type definition. In general +terms the type table will go through the following phases during the compilation +process: + + 1. The base type definitions are resolved after the parser phase has + finished. This implies that the AST is fully constructed, but not yet + annotated. + 2. With the base type definitions resolved, the validation/linker phase will + use the type table (together with the symbol table) to disambiguate + terms (e.g. does an expression refer to a variable, an enum, a constant, + etc.) + 3. During the type checking/inference phase the type table is used to ensure + that the AST contains valid use of types in expressions and statements. + At the same time type inference will find concrete instantiations of + polymorphic types, these will be stored in the type table as monomorphed + instantiations of a generic type. + 4. After type checking and inference (and possibly when constructing byte + code) the type table will construct a type graph and solidify each + non-polymorphic type and monomorphed instantiations of polymorphic types + into concrete types. + +So a base type is defined by its (optionally polymorphic) representation in the +AST. A concrete type replaces all polymorphic arguments with inferred types. A +struct, enum or union may have polymorphic arguments but not actually be a +polymorphic type. This happens when the polymorphic arguments are not used in +the type definition itself. + +NOTE: for now a polymorphic definition of a function/component is illegal if the + polymorphic arguments are not used in the arguments/return type. It should + be legal, but we disallow it for now. + +TODO: Allow potentially cyclic datatypes and reject truly cyclic datatypes. +TODO: Allow for the full potential of polymorphism +TODO: Detect "true" polymorphism: for datatypes like structs/enum/unions this + is simple. For functions we need to check the entire body. Do it here? Or + do it somewhere else? +TODO: Do we want to check fn argument collision here, or in validation phase? +*/ + +use std::fmt::{Formatter, Result as FmtResult}; +use std::collections::HashMap; + +use crate::protocol::ast::*; +use crate::protocol::parser::symbol_table::{SymbolTable, Symbol}; +use crate::protocol::inputsource::*; +use crate::protocol::parser::*; +use crate::protocol::parser::type_table2::ResolveResult::Resolved; + +//------------------------------------------------------------------------------ +// Defined Types +//------------------------------------------------------------------------------ + +#[derive(Copy, Clone, PartialEq, Eq)] +pub enum TypeClass { + Enum, + Union, + Struct, + Function, + Component +} + +impl TypeClass { + pub(crate) fn display_name(&self) -> &'static str { + match self { + TypeClass::Enum => "enum", + TypeClass::Union => "enum", + TypeClass::Struct => "struct", + TypeClass::Function => "function", + TypeClass::Component => "component", + } + } +} + +impl std::fmt::Display for TypeClass { + fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult { + write!(f, "{}", self.display_name()) + } +} + +/// Struct wrapping around a potentially polymorphic type. If the type does not +/// have any polymorphic arguments then it will not have any monomorphs and +/// `is_polymorph` will be set to `false`. A type with polymorphic arguments +/// only has `is_polymorph` set to `true` if the polymorphic arguments actually +/// appear in the types associated types (function return argument, struct +/// field, enum variant, etc.). Otherwise the polymorphic argument is just a +/// marker and does not influence the bytesize of the type. +pub struct DefinedType { + ast_definition: DefinitionId, + definition: DefinedTypeVariant, + poly_args: Vec, + is_polymorph: bool, + is_pointerlike: bool, + monomorphs: Vec, // TODO: ? +} + +pub enum DefinedTypeVariant { + Enum(EnumType), + Union(UnionType), + Struct(StructType), + Function(FunctionType), + Component(ComponentType) +} + +pub struct PolyArg { + identifier: Identifier, + is_phantom: bool, +} + +impl DefinedTypeVariant { + pub(crate) fn type_class(&self) -> TypeClass { + match self { + DefinedTypeVariant::Enum(_) => TypeClass::Enum, + DefinedTypeVariant::Union(_) => TypeClass::Union, + DefinedTypeVariant::Struct(_) => TypeClass::Struct, + DefinedTypeVariant::Function(_) => TypeClass::Function, + DefinedTypeVariant::Component(_) => TypeClass::Component + } + } +} + +/// `EnumType` is the classical C/C++ enum type. It has various variants with +/// an assigned integer value. The integer values may be user-defined, +/// compiler-defined, or a mix of the two. If a user assigns the same enum +/// value multiple times, we assume the user is an expert and we consider both +/// variants to be equal to one another. +pub struct EnumType { + variants: Vec, + representation: PrimitiveType, +} + +// TODO: Also support maximum u64 value +pub struct EnumVariant { + identifier: Identifier, + value: i64, +} + +/// `UnionType` is the algebraic datatype (or sum type, or discriminated union). +/// A value is an element of the union, identified by its tag, and may contain +/// a single subtype. +pub struct UnionType { + variants: Vec, + tag_representation: PrimitiveType +} + +pub struct UnionVariant { + identifier: Identifier, + parser_type: Option, + tag_value: i64, +} + +pub struct StructType { + fields: Vec, +} + +pub struct StructField { + identifier: Identifier, + parser_type: ParserTypeId, +} + +pub struct FunctionType { + return_type: ParserTypeId, + arguments: Vec +} + +pub struct ComponentType { + variant: ComponentVariant, + arguments: Vec +} + +pub struct FunctionArgument { + identifier: Identifier, + parser_type: ParserTypeId, +} + +//------------------------------------------------------------------------------ +// Type table +//------------------------------------------------------------------------------ + +// TODO: @cleanup Do I really need this, doesn't make the code that much cleaner +struct TypeIterator { + breadcrumbs: Vec<(RootId, DefinitionId)> +} + +impl TypeIterator { + fn new() -> Self { + Self{ breadcrumbs: Vec::with_capacity(32) } + } + + fn reset(&mut self, root_id: RootId, definition_id: DefinitionId) { + self.breadcrumbs.clear(); + self.breadcrumbs.push((root_id, definition_id)) + } + + fn push(&mut self, root_id: RootId, definition_id: DefinitionId) { + self.breadcrumbs.push((root_id, definition_id)); + } + + fn contains(&self, root_id: RootId, definition_id: DefinitionId) -> bool { + for (stored_root_id, stored_definition_id) in self.breadcrumbs.iter() { + if stored_root_id == root_id && stored_definition_id == definition_id { return true; } + } + + return false + } + + fn top(&self) -> Option<(RootId, DefinitionId)> { + self.breadcrumbs.last().map(|(r, d)| (*r, *d)) + } + + fn pop(&mut self) { + debug_assert!(!self.breadcrumbs.is_empty()); + self.breadcrumbs.pop(); + } +} + +#[derive(PartialEq, Eq)] +enum SpecifiedTypeVariant { + // No subtypes + Message, + Bool, + Byte, + Short, + Int, + Long, + String, + // Always one subtype + ArrayOf, + InputOf, + OutputOf, + // Variable number of subtypes, depending on the polymorphic arguments on + // the definition + InstanceOf(DefinitionId, usize) +} + +#[derive(Eq)] +struct SpecifiedType { + /// Definition ID, may not be enough as the type may be polymorphic + definition: DefinitionId, + /// The polymorphic types for the definition. These are encoded in a list, + /// which we interpret as the depth-first serialization of the type tree. + poly_vars: Vec +} + +impl PartialEq for SpecifiedType { + fn eq(&self, other: &Self) -> bool { + // Should point to same definition and have the same polyvars + if self.definition.index != other.definition.index { return false; } + if self.poly_vars.len() != other.poly_vars.len() { return false; } + for (my_var, other_var) in self.poly_vars.iter().zip(other.poly_vars.iter()) { + if my_var != other_var { return false; } + } + + return true + } +} + +impl SpecifiedType { + fn new_non_polymorph(definition: DefinitionId) -> Self { + Self{ definition, poly_vars: Vec::new() } + } + + fn new_polymorph(definition: DefinitionId, heap: &Heap, parser_type_id: ParserTypeId) -> Self { + // Serialize into concrete types + let mut poly_vars = Vec::new(); + Self::construct_poly_vars(&mut poly_vars, heap, parser_type_id); + Self{ definition, poly_vars } + } + + fn construct_poly_vars(poly_vars: &mut Vec, heap: &Heap, parser_type_id: ParserTypeId) { + // Depth-first construction of poly vars + let parser_type = &heap[parser_type_id]; + match &parser_type.variant { + ParserTypeVariant::Message => { poly_vars.push(SpecifiedTypeVariant::Message); }, + ParserTypeVariant::Bool => { poly_vars.push(SpecifiedTypeVariant::Bool); }, + ParserTypeVariant::Byte => { poly_vars.push(SpecifiedTypeVariant::Byte); }, + ParserTypeVariant::Short => { poly_vars.push(SpecifiedTypeVariant::Short); }, + ParserTypeVariant::Int => { poly_vars.push(SpecifiedTypeVariant::Int); }, + ParserTypeVariant::Long => { poly_vars.push(SpecifiedTypeVariant::Long); }, + ParserTypeVariant::String => { poly_vars.push(SpecifiedTypeVariant::String); }, + ParserTypeVariant::Array(subtype_id) => { + poly_vars.push(SpecifiedTypeVariant::ArrayOf); + Self::construct_poly_vars(poly_vars, heap, *subtype_id); + }, + ParserTypeVariant::Input(subtype_id) => { + poly_vars.push(SpecifiedTypeVariant::InputOf); + Self::construct_poly_vars(poly_vars, heap, *subtype_id); + }, + ParserTypeVariant::Output(subtype_id) => { + poly_vars.push(SpecifiedTypeVariant::OutputOf); + Self::construct_poly_vars(poly_vars, heap, *subtype_id); + }, + ParserTypeVariant::Symbolic(symbolic) => { + let definition_id = match symbolic.variant { + SymbolicParserTypeVariant::Definition(definition_id) => definition_id, + SymbolicParserTypeVariant::PolyArg(_) => { + // When construct entries in the type table, we no longer allow the + // unspecified types in the AST, we expect them to be fully inferred. + debug_assert!(false, "Encountered 'PolyArg' symbolic type. Expected fully inferred types"); + unreachable!(); + } + }; + + poly_vars.push(SpecifiedTypeVariant::InstanceOf(definition_id, symbolic.poly_args.len())); + for subtype_id in &symbolic.poly_args { + Self::construct_poly_vars(poly_vars, heap, *subtype_id); + } + }, + ParserTypeVariant::IntegerLiteral => { + debug_assert!(false, "Encountered 'IntegerLiteral' symbolic type. Expected fully inferred types"); + unreachable!(); + }, + ParserTypeVariant::Inferred => { + debug_assert!(false, "Encountered 'Inferred' symbolic type. Expected fully inferred types"); + unreachable!(); + } + } + } +} + +enum ResolveResult { + BuiltIn, + PolyArg, + Resolved((RootId, DefinitionId)), + Unresolved((RootId, DefinitionId)) +} + +pub(crate) struct TypeTable { + /// Lookup from AST DefinitionId to a defined type. Considering possible + /// polymorphs is done inside the `DefinedType` struct. + lookup: HashMap, + /// Iterator over (module, definition) tuples used as workspace to make sure + /// that each base definition of all a type's subtypes are resolved. + iter: TypeIterator, +} + +pub(crate) struct TypeCtx<'a> { + symbols: &'a SymbolTable, + heap: &'a Heap, + modules: &'a [LexedModule] +} + +impl TypeTable { + /// Construct a new type table without any resolved types. Types will be + /// resolved on-demand. + pub(crate) fn new(ctx: &TypeCtx) -> Self { + // Use context to guess hashmap size + let reserve_size = ctx.heap.definitions.len(); + Self{ + lookup: HashMap::with_capacity(reserve_size), + iter: TypeIterator::new(), + } + } + + pub(crate) fn resolve_type(&mut self, ctx: &TypeCtx, specified_type: &SpecifiedType) { + // Make sure we're allowed to cast root_id to index into ctx.modules + if cfg!(debug_assertions) { + for (index, module) in ctx.modules.iter().enumerate() { + debug_assert_eq!(index, module.root_id.index as usize); + } + } + + // Check if we already have the definition + let definition = self.get_definition(definition_id); + + } + + /// This function will resolve just the basic definition of the type, it + /// will not handle any of the monomorphized instances of the type. + fn resolve_base_definition(&mut self, ctx: &TypeCtx, definition_id: DefinitionId) -> &DefinedType { + // Check if we have already resolved the base definition + if let Some(definition) = self.lookup.get(&definition_id) { + return definition; + } + + let root_id = Self::find_root_id(ctx, definition_id); + self.iter.reset(root_id, definition_id); + + while let Some((root_id, definition_id)) = self.iter.top() { + // We have a type to resolve + let definition = &ctx.heap[definition_id]; + + let can_pop_breadcrumb = match definition { + Definition::Enum(definition) => self.resolve_base_enum_definition(ctx, root_id, definition), + Definition::Struct(definition) => self.resolve_base_struct_definition(ctx, root_id, definition), + Definition::Component(definition) => self.resolve_base_component_definition(ctx, root_id, definition), + Definition::Function(definition) => self.resolve_base_function_definition(ctx, root_id, definition), + }?; + + // Otherwise: `ingest_resolve_result` has pushed a new breadcrumb + // that we must follow before we can resolve the current type + if can_pop_breadcrumb { + self.iter.pop(); + } + } + + // We must have resolved the type + debug_assert!(self.lookup.contains_key(&definition_id), "base type not resolved"); + self.lookup.get(&definition_id).unwrap() + } + + /// Resolve the basic enum definition to an entry in the type table. It will + /// not instantiate any monomorphized instances of polymorphic enum + /// definitions. If a subtype has to be resolved first then this function + /// will return `false` after calling `ingest_resolve_result`. + fn resolve_base_enum_definition(&mut self, ctx: &TypeCtx, root_id: RootId, definition: &EnumDefinition) -> Result { + debug_assert!(!self.lookup.contains_key(&definition.this.upcast()), "base enum already resolved"); + + // Check if the enum should be implemented as a classic enumeration or + // a tagged union. Keep track of variant index for error messages. Make + // sure all embedded types are resolved. + let mut first_tag_value = None; + let mut first_int_value = None; + for variant in &definition.variants { + match &variant.value { + EnumVariantValue::None => {}, + EnumVariantValue::Integer(_) => if first_int_value.is_none() { + first_int_value = Some(variant.position); + }, + EnumVariantValue::Type(variant_type_id) => { + if first_tag_value.is_none() { + first_tag_value = Some(variant.position); + } + + // Check if the embedded type needs to be resolved + let resolve_result = self.resolve_base_parser_type(ctx, &definition.poly_vars, root_id, *variant_type_id)?; + if !self.ingest_resolve_result(ctx, resolve_result) { + return Ok(false) + } + } + } + } + + if first_tag_value.is_some() && first_int_value.is_some() { + // Not illegal, but useless and probably a programmer mistake + let module_source = &ctx.modules[root_id.index as usize].source; + let tag_pos = first_tag_value.unwrap(); + let int_pos = first_int_value.unwrap(); + return Err( + ParseError2::new_error( + module_source, definition.position, + "Illegal combination of enum integer variant(s) and enum union variant(s)" + ) + .with_postfixed_info(module_source, int_pos, "Assigning an integer value here") + .with_postfixed_info(module_source, tag_pos, "Embedding a type in a union variant here") + ); + } + + // Enumeration is legal + if first_tag_value.is_some() { + // Implement as a tagged union + + // Determine the union variants + let mut tag_value = -1; + let mut variants = Vec::with_capacity(definition.variants.len()); + for variant in &definition.variants { + tag_value += 1; + let parser_type = match &variant.value { + EnumVariantValue::None => { + None + }, + EnumVariantValue::Type(parser_type_id) => { + // Type should be resolvable, we checked this above + Some(*parser_type_id) + }, + EnumVariantValue::Integer(_) => { + debug_assert!(false, "Encountered `Integer` variant after asserting enum is a discriminated union"); + unreachable!(); + } + }; + + variants.push(UnionVariant{ + identifier: variant.identifier.clone(), + parser_type, + tag_value, + }) + } + + // Ensure union names and polymorphic args do not conflict + self.check_identifier_collision( + ctx, root_id, &variants, |variant| &variant.identifier, "enum variant" + )?; + self.check_poly_args_collision(ctx, root_id, &definition.poly_args)?; + + // Insert base definition in type table + let definition_id = definition.this.upcast(); + self.lookup.insert(definition_id, DefinedType { + ast_definition: definition_id, + definition: DefinedTypeVariant::Union(UnionType{ + variants, + tag_representation: Self::enum_tag_type(-1, tag_value), + }), + poly_args: Vec::new(), + is_polymorph: self.poly_args_in_use.iter().any(|v| *v), + is_pointerlike: false, // TODO: @cyclic_types + monomorphs: Vec::new() + }); + } else { + // Implement as a regular enum + debug_assert!(self.poly_args_in_use.iter().all(|v| !*v)); + let mut enum_value = -1; + let mut min_enum_value = 0; + let mut max_enum_value = 0; + let mut variants = Vec::with_capacity(definition.variants.len()); + for variant in &definition.variants { + enum_value += 1; + match &variant.value { + EnumVariantValue::None => { + variants.push(EnumVariant{ + identifier: variant.identifier.clone(), + value: enum_value, + }); + }, + EnumVariantValue::Integer(override_value) => { + enum_value = *override_value; + variants.push(EnumVariant{ + identifier: variant.identifier.clone(), + value: enum_value, + }); + }, + EnumVariantValue::Type(_) => { + debug_assert!(false, "Encountered `Type` variant after asserting enum is not a discriminated union"); + unreachable!(); + } + } + if enum_value < min_enum_value { min_enum_value = enum_value; } + else if enum_value > max_enum_value { max_enum_value = enum_value; } + } + + // Ensure enum names and polymorphic args do not conflict + self.check_identifier_collision( + ctx, root_id, &variants, |variant| &variant.identifier, "enum variant" + )?; + self.check_poly_args_collision(ctx, root_id, &definition.poly_vars)?; + + let definition_id = definition.this.upcast(); + self.lookup.insert(definition_id, DefinedType { + ast_definition: definition_id, + definition: DefinedTypeVariant::Enum(EnumType{ + variants, + representation: Self::enum_tag_type(min_enum_value, max_enum_value) + }), + poly_args: Vec::new(), + is_polymorph: false, + is_pointerlike: false, + monomorphs: Vec::new() + }); + } + + Ok(true) + } + + /// Resolves the basic struct definition to an entry in the type table. It + /// will not instantiate any monomorphized instances of polymorphic struct + /// definitions. + fn resolve_base_struct_definition(&mut self, ctx: &TypeCtx, root_id: RootId, definition: &StructDefinition) -> Result { + debug_assert!(!self.lookup.contains_key(&definition.this.upcast()), "base struct already resolved"); + + // Make sure all fields point to resolvable types + for field_definition in &definition.fields { + let resolve_result = self.resolve_base_parser_type(ctx, &definition.poly_vars, root_id, field_definition.parser_type)?; + if !self.ingest_resolve_result(ctx, resolve_result)? { + return Ok(false) + } + } + + // All fields types are resolved, construct base type + let mut fields = Vec::with_capacity(definition.fields.len()); + for field_definition in &definition.fields { + fields.push(StructField{ + identifier: field_definition.field.clone(), + parser_type: field_definition.parser_type, + }) + } + + // And make sure no conflicts exist in field names and/or polymorphic args + self.check_identifier_collision( + ctx, root_id, &fields, |field| &field.identifier, "struct field" + )?; + self.check_poly_args_collision(ctx, root_id, &definition.poly_vars)?; + + let definition_id = definition.this.upcast(); + self.lookup.insert(definition_id, DefinedType{ + ast_definition: definition_id, + definition: DefinedTypeVariant::Struct(StructType{ + fields, + }), + poly_args: Vec::new(), + is_polymorph: false, // TODO: @polymorphism + is_pointerlike: false, // TODO: @cyclic + monomorphs: Vec::new(), + }); + + Ok(true) + } + + /// Resolves the basic function definition to an entry in the type table. It + /// will not instantiate any monomorphized instances of polymorphic function + /// definitions. + fn resolve_base_function_definition(&mut self, ctx: &TypeCtx, root_id: RootId, definition: &Function) -> Result { + debug_assert!(!self.lookup.contains_key(&definition.this.upcast()), "base function already resolved"); + + // Check the return type + let resolve_result = self.resolve_base_parser_type( + ctx, &definition.poly_vars, root_id, definition.return_type + )?; + if !self.ingest_resolve_result(ctx, resolve_result)? { + return Ok(false) + } + + // Check the argument types + for param_id in &definition.parameters { + let param = &ctx.heap[*param_id]; + let resolve_result = self.resolve_base_parser_type( + ctx, &definition.poly_vars, root_id, param.parser_type + )?; + if !self.ingest_resolve_result(ctx, resolve_result)? { + return Ok(false) + } + } + + // Construct arguments to function + let mut arguments = Vec::with_capacity(definition.parameters.len()); + for param_id in &definition.parameters { + let param = &ctx.heap[*param_id]; + arguments.push(FunctionArgument{ + identifier: param.identifier.clone(), + parser_type: param.parser_type, + }) + } + + // Check conflict of argument and polyarg identifiers + self.check_identifier_collision( + ctx, root_id, &arguments, |arg| &arg.identifier, "function argument" + )?; + self.check_poly_args_collision(ctx, root_id, &definition.poly_vars)?; + + // Construct entry in type table + let definition_id = definition.this.upcast(); + self.lookup.insert(definition_id, DefinedType{ + ast_definition: definition_id, + definition: DefinedTypeVariant::Function(FunctionType{ + return_type: definition.return_type, + arguments, + }), + poly_args: Vec::new(), + is_polymorph: false, // TODO: @polymorphism + is_pointerlike: false, // TODO: @cyclic + monomorphs: Vec::new(), + }); + + Ok(true) + } + + /// Resolves the basic component definition to an entry in the type table. + /// It will not instantiate any monomorphized instancees of polymorphic + /// component definitions. + fn resolve_base_component_definition(&mut self, ctx: &TypeCtx, root_id: RootId, definition: &Component) -> Result { + debug_assert!(self.lookup.contains_key(&definition.this.upcast()), "base component already resolved"); + + // Check argument types + for param_id in &definition.parameters { + let param = &ctx.heap[*param_id]; + let resolve_result = self.resolve_base_parser_type( + ctx, &definition.poly_vars, root_id, param.parser_type + )?; + if !self.ingest_resolve_result(ctx, resolve_result)? { + return Ok(false) + } + } + + // Construct argument types + let mut arguments = Vec::with_capacity(definition.parameters.len()); + for param_id in &definition.parameters { + let param = &ctx.heap[*param_id]; + arguments.push(FunctionArgument{ + identifier: param.identifier.clone(), + parser_type: param.parser_type + }) + } + + // Check conflict of argument and polyarg identifiers + self.check_identifier_collision( + ctx, root_id, &arguments, |arg| &arg.identifier, "component argument" + )?; + self.check_poly_args_collision(ctx, root_id, &definition.poly_vars)?; + + // Construct entry in type table + let definition_id = definition.this.upcast(); + self.lookup.insert(definition_id, DefinedType{ + ast_definition: definition_id, + definition: DefinedTypeVariant::Component(ComponentType{ + variant: definition.variant, + arguments, + }), + poly_args: Vec::new(), + is_polymorph: false, // TODO: @polymorphism, + is_pointerlike: false, // TODO: @cyclic + monomorphs: Vec::new(), + }); + + Ok(true) + } + + /// Takes a ResolveResult and returns `true` if the caller can happily + /// continue resolving its current type, or `false` if the caller must break + /// resolving the current type and exit to the outer resolving loop. In the + /// latter case the `result` value was `ResolveResult::Unresolved`, implying + /// that the type must be resolved first. + fn ingest_resolve_result(&mut self, ctx: &TypeCtx, result: ResolveResult) -> Result { + match result { + ResolveResult::BuiltIn | ResolveResult::PolyArg => Ok(true), + ResolveResult::Resolved(_) => Ok(true), + ResolveResult::Unresolved((root_id, definition_id)) => { + if self.iter.contains(root_id, definition_id) { + // Cyclic dependency encountered + // TODO: Allow this + let mut error = ParseError2::new_error( + &ctx.modules[root_id.index as usize].source, ctx.heap[definition_id].position(), + "Evaluating this type definition results in a cyclic type" + ); + + for (breadcrumb_idx, (root_id, definition_id)) in self.iter.breadcrumbs.iter().enumerate() { + let msg = if breadcrumb_idx == 0 { + "The cycle started with this definition" + } else { + "Which depends on this definition" + }; + + error = error.with_postfixed_info( + &ctx.modules[root_id.index as usize].source, + ctx.heap[*definition_id].position(), msg + ); + } + + Err(error) + } else { + // Type is not yet resolved, so push IDs on iterator and + // continue the resolving loop + self.iter.push(root_id, definition_id); + Ok(false) + } + } + } + } + + /// Each type definition may consist of several subtypes (the type + /// associated with a union variant, or the type associated with a struct + /// field). This function goes through the subtype tree and determines if + /// each reference type is resolved or not (yet). + fn resolve_base_parser_type(&mut self, ctx: &TypeCtx, poly_vars: &Vec, root_id: RootId, mut parser_type_id: ParserTypeId) -> Result { + use ParserTypeVariant as PTV; + + loop { + let parser_type = &ctx.heap[parser_type_id]; + match &parser_type.variant { + // Builtin types. An array is a builtin as it is implemented as a + // couple of pointers, so we do not require the subtype to be fully + // resolved. + PTV::Array(_) | + PTV::Message | + PTV::Bool | + PTV::Byte | + PTV::Short | + PTV::Int | + PTV::Long | + PTV::String => { + return Ok(ResolveResult::BuiltIn); + }, + // IntegerLiteral types and the inferred marker are not allowed in + // definitions of types + PTV::IntegerLiteral | + PTV::Inferred => { + debug_assert!(false, "Encountered illegal ParserTypeVariant within type definition"); + unreachable!(); + }, + // Input/Output port + // TODO: @unclear Come back to this, do these need to be fully + // resolved? Or do we implement these as the port identifiers? + PTV::Input(subtype_id) | + PTV::Output(subtype_id) => { + parser_type_id = *subtype_id; + }, + PTV::Symbolic(symbolic) => { + // Check if the symbolic type is one of the definition's + // polymorphic arguments. If so then we can halt the + // execution + for poly_arg in poly_vars.iter() { + if poly_arg.value == symbolic.identifier.value { + return Ok(ResolveResult::PolyArg) + } + } + + // Lookup the definition in the symbol table + let symbol = ctx.symbols.resolve_namespaced_symbol(root_id, &symbolic.identifier); + if symbol.is_none() { + return Err(ParseError2::new_error( + &ctx.modules[root_id.index as usize].source, symbolic.identifier.position, + "Could not resolve type" + )) + } + + let (symbol_value, mut ident_iter) = symbol.unwrap(); + match symbol_value.symbol { + Symbol::Namespace(_) => { + // Reference to a namespace instead of a type + return if ident_iter.num_remaining() == 0 { + Err(ParseError2::new_error( + &ctx.modules[root_id.index as usize].source, symbolic.identifier.position, + "Expected a type, got a module name" + )) + } else { + let next_identifier = ident_iter.next().unwrap(); + Err(ParseError2::new_error( + &ctx.modules[root_id.index as usize].source, symbolic.identifier.position, + &format!("Could not find symbol '{}' with this module", String::from_utf8_lossy(next_identifier)) + )) + } + }, + Symbol::Definition((root_id, definition_id)) => { + let definition = &ctx.heap[definition_id]; + if ident_iter.num_remaining > 0 { + // Namespaced identifier is longer than the type + // we found. Return the appropriate message + return if definition.is_struct() || definition.is_enum() { + Err(ParseError2::new_error( + &ctx.modules[root_id.index as usize].source, symbolic.identifier.position, + &format!( + "Unknown type '{}', did you mean to use '{}'?", + String::from_utf8_lossy(&symbolic.identifier.value), + String::from_utf8_lossy(&definition.identifier().value) + ) + )) + } else { + Err(ParseError2::new_error( + &ctx.modules[root_id.index as usize].source, symbolic.identifier.position, + "Unknown type" + )) + } + } + + // Found a match, make sure it is a datatype + if !(definition.is_struct() || definition.is_enum()) { + return Err(ParseError2::new_error( + &ctx.modules[root_id.index as usize].source, symbolic.identifier.position, + "Embedded types must be datatypes (structs or enums)" + )) + } + + // Found a struct/enum definition + return if self.lookup.contains_key(&definition_id) { + Ok(ResolveResult::Resolved((root_id, definition_id))) + } else { + Ok(ResolveResult::Unresolved((root_id, definition_id))) + } + } + } + } + } + } + } + + /// Go through a list of identifiers and ensure that all identifiers have + /// unique names + fn check_identifier_collision &Identifier>( + &self, ctx: &TypeCtx, root_id: RootId, items: &[T], getter: F, item_name: &'static str + ) -> Result<(), ParseError2> { + for (item_idx, item) in items.iter().enumerate() { + let item_ident = getter(item); + for other_item in &items[0..item_idx] { + let other_item_ident = getter(other_item); + if item_ident.value == other_item_ident { + let module_source = &ctx.modules[root_id.index as usize].source; + return Err(ParseError2::new_error( + module_source, item_ident.position, &format!("This {} is defined more than once", item_name) + ).with_postfixed_info( + module_source, other_item_ident.position, &format!("The other {} is defined here", item_name) + )); + } + } + } + + Ok(()) + } + + /// Go through a list of polymorphic arguments and make sure that the + /// arguments all have unique names, and the arguments do not conflict with + /// any symbols defined at the module scope. + fn check_poly_args_collision( + &self, ctx: &TypeCtx, root_id: RootId, poly_args: &[Identifier] + ) -> Result<(), ParseError2> { + // Make sure polymorphic arguments are unique and none of the + // identifiers conflict with any imported scopes + for (arg_idx, poly_arg) in poly_args.iter().enumerate() { + for other_poly_arg in &poly_args[..arg_idx] { + if poly_arg.value == other_poly_arg.value { + let module_source = &ctx.modules[root_id.index as usize].source; + return Err(ParseError2::new_error( + module_source, poly_arg.position, + "This polymorphic argument is defined more than once" + ).with_postfixed_info( + module_source, other_poly_arg.position, + "It conflicts with this polymorphic argument" + )); + } + } + + // Check if identifier conflicts with a symbol defined or imported + // in the current module + if let Some(symbol) = ctx.symbols.resolve_symbol(root_id, &poly_arg.value) { + // We have a conflict + let module_source = &ctx.modules[root_id.index as usize].source; + return Err(ParseError2::new_error( + module_source, poly_arg.position, + "This polymorphic argument conflicts with another symbol" + ).with_postfixed_info( + module_source, symbol.position, + "It conflicts due to this symbol" + )); + } + } + + // All arguments are fine + Ok(()) + } + + //-------------------------------------------------------------------------- + // Small utilities + //-------------------------------------------------------------------------- + + fn enum_tag_type(min_tag_value: i64, max_tag_value: i64) -> PrimitiveType { + // TODO: @consistency tag values should be handled correctly + debug_assert!(min_tag_value < max_tag_value); + let abs_max_value = min_tag_value.abs().max(max_tag_value.abs()); + if abs_max_value <= u8::max_value() as i64 { + PrimitiveType::Byte + } else if abs_max_value <= u16::max_value() as i64 { + PrimitiveType::Short + } else if abs_max_value <= u32::max_value() as i64 { + PrimitiveType::Int + } else { + PrimitiveType::Long + } + } + + fn find_root_id(ctx: &TypeCtx, definition_id: DefinitionId) -> RootId { + // TODO: Keep in lookup or something + for module in ctx.modules { + let root_id = module.root_id; + let root = &ctx.heap[root_id]; + for module_definition_id in root.definitions.iter() { + if module_definition_id == definition_id { + return root_id + } + } + } + + debug_assert!(false, "DefinitionId without corresponding RootId"); + unreachable!(); + } +} \ No newline at end of file