diff --git a/src/protocol/ast.rs b/src/protocol/ast.rs index 009e3f85c8eb904b8aae1a22cc1301fdb1b9329d..325ae99245a61d17d91e0be3e78dffce70c82ea1 100644 --- a/src/protocol/ast.rs +++ b/src/protocol/ast.rs @@ -91,6 +91,7 @@ define_new_ast_id!(LocalId, VariableId, Local, Variable::Local, variables); define_aliased_ast_id!(DefinitionId, Id, Definition, definitions); define_new_ast_id!(StructId, DefinitionId, StructDefinition, Definition::Struct, definitions); define_new_ast_id!(EnumId, DefinitionId, EnumDefinition, Definition::Enum, definitions); +define_new_ast_id!(UnionId, DefinitionId, UnionDefinition, Definition::Union, definitions); define_new_ast_id!(ComponentId, DefinitionId, Component, Definition::Component, definitions); define_new_ast_id!(FunctionId, DefinitionId, Function, Definition::Function, definitions); @@ -452,6 +453,11 @@ impl Heap { Definition::Enum(f(EnumId(id))) })) } + pub fn alloc_union_definition(&mut self, f: impl FnOnce(UnionId) -> UnionDefinition) -> UnionId { + UnionId(self.definitions.alloc_with_id(|id| { + Definition::Union(f(UnionId(id))) + })) + } pub fn alloc_component(&mut self, f: impl FnOnce(ComponentId) -> Component) -> ComponentId { ComponentId(self.definitions.alloc_with_id(|id| { Definition::Component(f(ComponentId(id))) @@ -1060,90 +1066,6 @@ impl Display for Type { } } -type LiteralCharacter = Vec; -type LiteralInteger = i64; // TODO: @int_literal - -#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] -pub enum Literal { - Null, // message - True, - False, - Character(LiteralCharacter), - Integer(LiteralInteger), - Struct(LiteralStruct), - Enum(LiteralEnum), -} - -impl Literal { - pub(crate) fn as_struct(&self) -> &LiteralStruct { - if let Literal::Struct(literal) = self{ - literal - } else { - unreachable!("Attempted to obtain {:?} as Literal::Struct", self) - } - } - - pub(crate) fn as_struct_mut(&mut self) -> &mut LiteralStruct { - if let Literal::Struct(literal) = self{ - literal - } else { - unreachable!("Attempted to obtain {:?} as Literal::Struct", self) - } - } - - pub(crate) fn as_enum(&self) -> &LiteralEnum { - if let Literal::Enum(literal) = self { - literal - } else { - unreachable!("Attempted to obtain {:?} as Literal::Enum", self) - } - } -} - -#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] -pub struct LiteralStructField { - // Phase 1: parser - pub(crate) identifier: Identifier, - pub(crate) value: ExpressionId, - // Phase 2: linker - pub(crate) field_idx: usize, // in struct definition -} - -#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] -pub struct LiteralStruct { - // Phase 1: parser - pub(crate) identifier: NamespacedIdentifier, - pub(crate) fields: Vec, - // Phase 2: linker - pub(crate) poly_args2: Vec, // taken from identifier once linked to a definition - pub(crate) definition: Option -} - -#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] -pub struct LiteralEnum { - // Phase 1: parser - pub(crate) identifier: NamespacedIdentifier, - // Phase 2: linker - pub(crate) poly_args2: Vec, // taken from identifier once linked to a definition - pub(crate) definition: Option, - pub(crate) variant_idx: usize, // as present in the type table -} - -#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] -pub enum Method { - Get, - Put, - Fires, - Create, - Symbolic(MethodSymbolic) -} - -#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] -pub struct MethodSymbolic { - pub(crate) identifier: NamespacedIdentifier, - pub(crate) definition: Option -} - #[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] pub enum Field { Length, @@ -1305,6 +1227,7 @@ impl SyntaxElement for Local { pub enum Definition { Struct(StructDefinition), Enum(EnumDefinition), + Union(UnionDefinition), Component(Component), Function(Function), } @@ -1334,6 +1257,18 @@ impl Definition { _ => panic!("Unable to cast 'Definition' to 'EnumDefinition'"), } } + pub fn is_union(&self) -> bool { + match self { + Definition::Union(_) => true, + _ => false, + } + } + pub fn as_union(&self) -> &UnionDefinition { + match self { + Definition::Union(result) => result, + _ => panic!("Unable to cast 'Definition' to 'UnionDefinition'"), + } + } pub fn is_component(&self) -> bool { match self { Definition::Component(_) => true, @@ -1362,8 +1297,9 @@ impl Definition { match self { Definition::Struct(def) => &def.identifier, Definition::Enum(def) => &def.identifier, - Definition::Component(com) => &com.identifier, - Definition::Function(fun) => &fun.identifier, + Definition::Union(def) => &def.identifier, + Definition::Component(def) => &def.identifier, + Definition::Function(def) => &def.identifier, } } pub fn parameters(&self) -> &Vec { @@ -1390,6 +1326,7 @@ impl SyntaxElement for Definition { match self { Definition::Struct(def) => def.position, Definition::Enum(def) => def.position, + Definition::Union(def) => def.position, Definition::Component(def) => def.position(), Definition::Function(def) => def.position(), } @@ -1428,11 +1365,10 @@ pub struct StructDefinition { pub fields: Vec } -#[derive(Debug, Clone, serde::Serialize, serde::Deserialize, PartialEq)] +#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] pub enum EnumVariantValue { None, Integer(i64), - Type(ParserTypeId), } #[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] @@ -1452,6 +1388,29 @@ pub struct EnumDefinition { pub variants: Vec, } +#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] +pub enum UnionVariantValue { + None, + Embedded(Vec), +} + +#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] +pub struct UnionVariantDefinition { + pub position: InputPosition, + pub identifier: Identifier, + pub value: UnionVariantValue, +} + +#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] +pub struct UnionDefinition { + pub this: UnionId, + // Phase 1: parser + pub position: InputPosition, + pub identifier: Identifier, + pub poly_vars: Vec, + pub variants: Vec, +} + #[derive(Debug, Clone, Copy, serde::Serialize, serde::Deserialize)] pub enum ComponentVariant { Primitive, @@ -2533,6 +2492,10 @@ impl SyntaxElement for ArrayExpression { } } +// TODO: @tokenizer Symbolic function calls are ambiguous with union literals +// that accept embedded values (although the polymorphic arguments are placed +// differently). To prevent double work we parse as CallExpression, and during +// validation we may transform the expression into a union literal. #[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] pub struct CallExpression { pub this: CallExpressionId, @@ -2540,7 +2503,7 @@ pub struct CallExpression { pub position: InputPosition, pub method: Method, pub arguments: Vec, - pub poly_args: Vec, + pub poly_args: Vec, // if symbolic will be determined during validation phase // Phase 2: linker pub parent: ExpressionParent, // Phase 3: type checking @@ -2553,6 +2516,21 @@ impl SyntaxElement for CallExpression { } } +#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] +pub enum Method { + Get, + Put, + Fires, + Create, + Symbolic(MethodSymbolic) +} + +#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] +pub struct MethodSymbolic { + pub(crate) identifier: NamespacedIdentifier, + pub(crate) definition: Option +} + #[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] pub struct LiteralExpression { pub this: LiteralExpressionId, @@ -2571,6 +2549,99 @@ impl SyntaxElement for LiteralExpression { } } +type LiteralCharacter = Vec; +type LiteralInteger = i64; // TODO: @int_literal + +#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] +pub enum Literal { + Null, // message + True, + False, + Character(LiteralCharacter), + Integer(LiteralInteger), + Struct(LiteralStruct), + Enum(LiteralEnum), + Union(LiteralUnion), +} + +impl Literal { + pub(crate) fn as_struct(&self) -> &LiteralStruct { + if let Literal::Struct(literal) = self{ + literal + } else { + unreachable!("Attempted to obtain {:?} as Literal::Struct", self) + } + } + + pub(crate) fn as_struct_mut(&mut self) -> &mut LiteralStruct { + if let Literal::Struct(literal) = self{ + literal + } else { + unreachable!("Attempted to obtain {:?} as Literal::Struct", self) + } + } + + pub(crate) fn as_enum(&self) -> &LiteralEnum { + if let Literal::Enum(literal) = self { + literal + } else { + unreachable!("Attempted to obtain {:?} as Literal::Enum", self) + } + } + + pub(crate) fn as_union(&self) -> &LiteralUnion { + if let Literal::Union(literal) = self { + literal + } else { + unreachable!("Attempted to obtain {:?} as Literal::Union", self) + } + } +} + +#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] +pub struct LiteralStructField { + // Phase 1: parser + pub(crate) identifier: Identifier, + pub(crate) value: ExpressionId, + // Phase 2: linker + pub(crate) field_idx: usize, // in struct definition +} + +#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] +pub struct LiteralStruct { + // Phase 1: parser + pub(crate) identifier: NamespacedIdentifier, + pub(crate) fields: Vec, + // Phase 2: linker + pub(crate) poly_args2: Vec, // taken from identifier once linked to a definition + pub(crate) definition: Option +} + +// TODO: @tokenizer Enum literals are ambiguous with union literals that do not +// accept embedded values. To prevent double work for now we parse as a +// LiteralEnum, and during validation we may transform the expression into a +// union literal. +#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] +pub struct LiteralEnum { + // Phase 1: parser + pub(crate) identifier: NamespacedIdentifier, + // Phase 2: linker + pub(crate) poly_args2: Vec, // taken from identifier once linked to a definition + pub(crate) definition: Option, + pub(crate) variant_idx: usize, // as present in the type table +} + +#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] +pub struct LiteralUnion { + // Phase 1: parser + pub(crate) identifier: NamespacedIdentifier, + pub(crate) values: Vec, + // Phase 2: linker + pub(crate) poly_args2: Vec, // taken from identifier once linked to a definition + pub(crate) definition: Option, + pub(crate) variant_idx: usize, // as present in type table +} + #[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] pub struct VariableExpression { pub this: VariableExpressionId, diff --git a/src/protocol/ast_printer.rs b/src/protocol/ast_printer.rs index 92c14d0d7f752d2ac9511911219cdf90d381aee4..4e56ceb4b25452f95d30e5868fcf1dc7daecbbd1 100644 --- a/src/protocol/ast_printer.rs +++ b/src/protocol/ast_printer.rs @@ -16,6 +16,7 @@ const PREFIX_LOCAL_ID: &'static str = "Loc "; const PREFIX_DEFINITION_ID: &'static str = "Def "; const PREFIX_STRUCT_ID: &'static str = "DefS"; const PREFIX_ENUM_ID: &'static str = "DefE"; +const PREFIX_UNION_ID: &'static str = "DefU"; const PREFIX_COMPONENT_ID: &'static str = "DefC"; const PREFIX_FUNCTION_ID: &'static str = "DefF"; const PREFIX_STMT_ID: &'static str = "Stmt"; @@ -311,11 +312,38 @@ impl ASTWriter { match &variant.value { EnumVariantValue::None => variant_value.with_s_val("None"), EnumVariantValue::Integer(value) => variant_value.with_disp_val(value), - EnumVariantValue::Type(parser_type_id) => variant_value - .with_custom_val(|s| write_parser_type(s, heap, &heap[*parser_type_id])), }; } }, + Definition::Union(def) => { + self.kv(indent).with_id(PREFIX_UNION_ID, def.this.0.index) + .with_s_key("DefinitionUnion"); + + self.kv(indent2).with_s_key("Name").with_ascii_val(&def.identifier.value); + for poly_var_id in &def.poly_vars { + self.kv(indent3).with_s_key("PolyVar").with_ascii_val(&poly_var_id.value); + } + + self.kv(indent2).with_s_key("Variants"); + for variant in &def.variants { + self.kv(indent3).with_s_key("Variant"); + self.kv(indent4).with_s_key("Name") + .with_ascii_val(&variant.identifier.value); + + match &variant.value { + UnionVariantValue::None => { + self.kv(indent4).with_s_key("Value").with_s_val("None"); + } + UnionVariantValue::Embedded(embedded) => { + self.kv(indent4).with_s_key("Values"); + for embedded in embedded { + self.kv(indent4+1).with_s_key("Value") + .with_custom_val(|v| write_parser_type(v, heap, &heap[*embedded])); + } + } + } + } + } Definition::Function(def) => { self.kv(indent).with_id(PREFIX_FUNCTION_ID, def.this.0.index) .with_s_key("DefinitionFunction"); @@ -368,6 +396,19 @@ impl ASTWriter { self.kv(indent2).with_s_key("ParserType").with_custom_val(|w| write_parser_type(w, heap, &heap[param.parser_type])); } + fn write_poly_args(&mut self, heap: &Heap, poly_args: &[ParserTypeId], indent: usize) { + if poly_args.is_empty() { + return + } + + let indent2 = indent + 1; + self.kv(indent).with_s_key("PolymorphicArguments"); + for poly_arg in poly_args { + self.kv(indent2).with_s_key("Argument") + .with_custom_val(|v| write_parser_type(v, heap, &heap[*poly_arg])); + } + } + fn write_stmt(&mut self, heap: &Heap, stmt_id: StatementId, indent: usize) { let stmt = &heap[stmt_id]; let indent2 = indent + 1; @@ -666,14 +707,10 @@ impl ASTWriter { val.with_s_val("Struct"); let indent4 = indent3 + 1; - // Polymorphic arguments - if !data.poly_args2.is_empty() { - self.kv(indent3).with_s_key("PolymorphicArguments"); - for poly_arg in &data.poly_args2 { - self.kv(indent4).with_s_key("Argument") - .with_custom_val(|v| write_parser_type(v, heap, &heap[*poly_arg])); - } - } + self.write_poly_args(heap, &data.poly_args2, indent3); + self.kv(indent3).with_s_key("Definition").with_custom_val(|s| { + write_option(s, data.definition.as_ref().map(|v| &v.index)); + }); for field in &data.fields { self.kv(indent3).with_s_key("Field"); @@ -685,15 +722,25 @@ impl ASTWriter { }, Literal::Enum(data) => { val.with_s_val("Enum"); - let indent4 = indent3 + 1; - // Polymorphic arguments - if !data.poly_args2.is_empty() { - self.kv(indent3).with_s_key("PolymorphicArguments"); - for poly_arg in &data.poly_args2 { - self.kv(indent4).with_s_key("Argument") - .with_custom_val(|v| write_parser_type(v, heap, &heap[*poly_arg])); - } + self.write_poly_args(heap, &data.poly_args2, indent3); + self.kv(indent3).with_s_key("Definition").with_custom_val(|s| { + write_option(s, data.definition.as_ref().map(|v| &v.index)) + }); + self.kv(indent3).with_s_key("VariantIdx").with_disp_val(&data.variant_idx); + }, + Literal::Union(data) => { + val.with_s_val("Union"); + let indent4 = indent3 + 1; + self.write_poly_args(heap, &data.poly_args2, indent3); + self.kv(indent3).with_s_key("Definition").with_custom_val(|s| { + write_option(s, data.definition.as_ref().map(|v| &v.index)); + }); + self.kv(indent3).with_s_key("VariantIdx").with_disp_val(&data.variant_idx); + + for value in &data.values { + self.kv(indent3).with_s_key("Value"); + self.write_expr(heap, *value, indent4); } } } @@ -722,14 +769,7 @@ impl ASTWriter { } } - // Polymorphic arguments - if !expr.poly_args.is_empty() { - self.kv(indent2).with_s_key("PolymorphicArguments"); - for poly_arg in &expr.poly_args { - self.kv(indent3).with_s_key("Argument") - .with_custom_val(|v| write_parser_type(v, heap, &heap[*poly_arg])); - } - } + self.write_poly_args(heap, &expr.poly_args, indent2); // Arguments self.kv(indent2).with_s_key("Arguments"); @@ -848,7 +888,7 @@ fn write_concrete_type(target: &mut String, heap: &Heap, def_id: DefinitionId, t // Marker points to polymorphic variable index let definition = &heap[def_id]; let poly_var_ident = match definition { - Definition::Struct(_) | Definition::Enum(_) => unreachable!(), + Definition::Struct(_) | Definition::Enum(_) | Definition::Union(_) => unreachable!(), Definition::Function(definition) => &definition.poly_vars[*marker].value, Definition::Component(definition) => &definition.poly_vars[*marker].value, }; diff --git a/src/protocol/eval.rs b/src/protocol/eval.rs index a337ef11a732ee6b50a149b64e7d6ea2c77f31ac..90f093995931cd3c1dab2a90d8b5fd96906a4eb7 100644 --- a/src/protocol/eval.rs +++ b/src/protocol/eval.rs @@ -89,6 +89,7 @@ impl Value { Literal::Character(_data) => unimplemented!(), Literal::Struct(_data) => unimplemented!(), Literal::Enum(_data) => unimplemented!(), + Literal::Union(_data) => unimplemented!(), } } fn set(&mut self, index: &Value, value: &Value) -> Option { diff --git a/src/protocol/lexer.rs b/src/protocol/lexer.rs index bb7f61a98143ad974a9866dada82acfe546b8400..29ed1c92942d206de98b701953917367d665bd00 100644 --- a/src/protocol/lexer.rs +++ b/src/protocol/lexer.rs @@ -2182,6 +2182,8 @@ impl Lexer<'_> { Ok(self.consume_struct_definition(h)?.upcast()) } else if self.has_keyword(b"enum") { Ok(self.consume_enum_definition(h)?.upcast()) + } else if self.has_keyword(b"union") { + Ok(self.consume_union_definition(h)?.upcast()) } else if self.has_keyword(b"composite") || self.has_keyword(b"primitive") { Ok(self.consume_component_definition(h)?.upcast()) } else { @@ -2262,21 +2264,12 @@ impl Lexer<'_> { let value = lexer.consume_integer()?; EnumVariantValue::Integer(value) }, - Some(b'(') => { - // Embedded type - lexer.source.consume(); - lexer.consume_whitespace(false)?; - let embedded_type = lexer.consume_type(heap, false)?; - lexer.consume_whitespace(false)?; - lexer.consume_string(b")")?; - EnumVariantValue::Type(embedded_type) - }, Some(b'}') => { // End of enum EnumVariantValue::None } _ => { - return Err(lexer.error_at_pos("Expected ',', '=', '}' or '('")); + return Err(lexer.error_at_pos("Expected ',', '}' or '='")); } }; @@ -2298,6 +2291,69 @@ impl Lexer<'_> { variants, })) } + fn consume_union_definition(&mut self, h: &mut Heap) -> Result { + // Parse "union" keyword, optional polyvars and the identifier + let union_pos = self.source.pos(); + self.consume_keyword(b"union")?; + self.consume_whitespace(true)?; + let union_ident = self.consume_identifier()?; + self.consume_whitespace(false)?; + let poly_vars = self.consume_polymorphic_vars(h)?; + self.consume_whitespace(false)?; + + let variants = match self.consume_comma_separated( + h, b'{', b'}', "Expected end of union variant list", + |lexer, heap| { + // Variant identifier + let position = lexer.source.pos(); + let identifier = lexer.consume_identifier()?; + lexer.consume_whitespace(false)?; + + // Optional variant value + let next = lexer.source.next(); + let value = match next { + Some(b',') | Some(b'}') => { + // Continue parsing using `consume_comma_separated` + UnionVariantValue::None + }, + Some(b'(') => { + // Embedded type(s) + let embedded = lexer.consume_comma_separated( + heap, b'(', b')', "Expected end of embedded type list of union variant", + |lexer, heap| { + lexer.consume_type(heap, false) + } + )?.unwrap(); + + if embedded.is_empty() { + return Err(lexer.error_at_pos("Expected at least one embedded type")); + } + + UnionVariantValue::Embedded(embedded) + }, + _ => { + return Err(lexer.error_at_pos("Expected ',', '}' or '('")); + }, + }; + + Ok(UnionVariantDefinition{ position, identifier, value }) + } + )? { + Some(variants) => variants, + None => return Err(ParseError::new_error( + self.source, union_pos, + "A union definition must be followed by its variants" + )), + }; + + Ok(h.alloc_union_definition(|this| UnionDefinition{ + this, + position: union_pos, + identifier: union_ident, + poly_vars, + variants, + })) + } fn consume_component_definition(&mut self, h: &mut Heap) -> Result { // TODO: Cleanup if self.has_keyword(b"composite") { diff --git a/src/protocol/parser/depth_visitor.rs b/src/protocol/parser/depth_visitor.rs index 06672185b15a041039576e7ca635448c7753fae2..8189cf0cfdf0fb1b5611e57c97a33e51985b3500 100644 --- a/src/protocol/parser/depth_visitor.rs +++ b/src/protocol/parser/depth_visitor.rs @@ -26,6 +26,9 @@ pub(crate) trait Visitor: Sized { fn visit_enum_definition(&mut self, _h: &mut Heap, _def: EnumId) -> VisitorResult { Ok(()) } + fn visit_union_definition(&mut self, _h: &mut Heap, _def: UnionId) -> VisitorResult { + Ok(()) + } fn visit_component_definition(&mut self, h: &mut Heap, def: ComponentId) -> VisitorResult { recursive_component_definition(self, h, def) } @@ -241,6 +244,7 @@ fn recursive_symbol_definition( match h[def].clone() { Definition::Struct(def) => this.visit_struct_definition(h, def.this), Definition::Enum(def) => this.visit_enum_definition(h, def.this), + Definition::Union(def) => this.visit_union_definition(h, def.this), Definition::Component(cdef) => this.visit_component_definition(h, cdef.this), Definition::Function(fdef) => this.visit_function_definition(h, fdef.this), } diff --git a/src/protocol/parser/type_resolver.rs b/src/protocol/parser/type_resolver.rs index b381df2e8353835bc246adce4e239aecac4914a4..0bc1ba54cc3f44f4c478753244ab72bddfe0d484 100644 --- a/src/protocol/parser/type_resolver.rs +++ b/src/protocol/parser/type_resolver.rs @@ -907,7 +907,7 @@ impl TypeResolvingVisitor { }) } }, - Definition::Enum(_) | Definition::Struct(_) => {}, + Definition::Enum(_) | Definition::Struct(_) | Definition::Union(_) => {}, } } } @@ -2792,8 +2792,8 @@ impl TypeResolvingVisitor { let return_type = self.determine_inference_type_from_parser_type(ctx, definition.return_type, false); (parameter_types, return_type) }, - Definition::Struct(_) | Definition::Enum(_) => { - unreachable!("insert initial polymorph data for struct/enum"); + Definition::Struct(_) | Definition::Enum(_) | Definition::Union(_) => { + unreachable!("insert initial polymorph data for struct/enum/union"); } } } diff --git a/src/protocol/parser/type_table.rs b/src/protocol/parser/type_table.rs index 86cd109ebc04b44a443f725934a3f9b527a4ffc4..60d2c3886e4cc8327aaaf16ef7692c78468c322b 100644 --- a/src/protocol/parser/type_table.rs +++ b/src/protocol/parser/type_table.rs @@ -76,7 +76,7 @@ impl TypeClass { pub(crate) fn display_name(&self) -> &'static str { match self { TypeClass::Enum => "enum", - TypeClass::Union => "enum", + TypeClass::Union => "union", TypeClass::Struct => "struct", TypeClass::Function => "function", TypeClass::Component => "component", @@ -175,6 +175,13 @@ impl DefinedTypeVariant { _ => unreachable!("Cannot convert {} to enum variant", self.type_class()) } } + + pub(crate) fn as_union(&self) -> &UnionType { + match self { + DefinedTypeVariant::Union(v) => v, + _ => unreachable!("Cannot convert {} to union variant", self.type_class()) + } + } } /// `EnumType` is the classical C/C++ enum type. It has various variants with @@ -197,14 +204,14 @@ pub struct EnumVariant { /// A value is an element of the union, identified by its tag, and may contain /// a single subtype. pub struct UnionType { - variants: Vec, - tag_representation: PrimitiveType + pub(crate) variants: Vec, + pub(crate) tag_representation: PrimitiveType } pub struct UnionVariant { - identifier: Identifier, - parser_type: Option, - tag_value: i64, + pub(crate) identifier: Identifier, + pub(crate) embedded: Vec, // zero-length does not have embedded values + pub(crate) tag_value: i64, } pub struct StructType { @@ -401,6 +408,7 @@ impl TypeTable { let can_pop_breadcrumb = match definition { // TODO: @cleanup Borrow rules hax Definition::Enum(_) => self.resolve_base_enum_definition(ctx, root_id, definition_id), + Definition::Union(_) => self.resolve_base_union_definition(ctx, root_id, definition_id), Definition::Struct(_) => self.resolve_base_struct_definition(ctx, root_id, definition_id), Definition::Component(_) => self.resolve_base_component_definition(ctx, root_id, definition_id), Definition::Function(_) => self.resolve_base_function_definition(ctx, root_id, definition_id), @@ -428,157 +436,131 @@ impl TypeTable { let definition = ctx.heap[definition_id].as_enum(); - // Check if the enum should be implemented as a classic enumeration or - // a tagged union. Keep track of variant index for error messages. Make - // sure all embedded types are resolved. - let mut first_tag_value = None; - let mut first_int_value = None; + let mut enum_value = -1; + let mut min_enum_value = 0; + let mut max_enum_value = 0; + let mut variants = Vec::with_capacity(definition.variants.len()); for variant in &definition.variants { + enum_value += 1; match &variant.value { - EnumVariantValue::None => {}, - EnumVariantValue::Integer(_) => if first_int_value.is_none() { - first_int_value = Some(variant.position); + EnumVariantValue::None => { + variants.push(EnumVariant{ + identifier: variant.identifier.clone(), + value: enum_value, + }); }, - EnumVariantValue::Type(variant_type_id) => { - if first_tag_value.is_none() { - first_tag_value = Some(variant.position); - } - - // Check if the embedded type needs to be resolved - let resolve_result = self.resolve_base_parser_type(ctx, &definition.poly_vars, root_id, *variant_type_id)?; - if !self.ingest_resolve_result(ctx, resolve_result)? { - return Ok(false) - } + EnumVariantValue::Integer(override_value) => { + enum_value = *override_value; + variants.push(EnumVariant{ + identifier: variant.identifier.clone(), + value: enum_value, + }); } } + if enum_value < min_enum_value { min_enum_value = enum_value; } + else if enum_value > max_enum_value { max_enum_value = enum_value; } } - if first_tag_value.is_some() && first_int_value.is_some() { - // Not illegal, but useless and probably a programmer mistake - let module_source = &ctx.modules[root_id.index as usize].source; - let tag_pos = first_tag_value.unwrap(); - let int_pos = first_int_value.unwrap(); - return Err( - ParseError::new_error( - module_source, definition.position, - "Illegal combination of enum integer variant(s) and enum union variant(s)" - ) - .with_postfixed_info(module_source, int_pos, "Assigning an integer value here") - .with_postfixed_info(module_source, tag_pos, "Embedding a type in a union variant here") - ); - } + // Ensure enum names and polymorphic args do not conflict + self.check_identifier_collision( + ctx, root_id, &variants, |variant| &variant.identifier, "enum variant" + )?; + self.check_poly_args_collision(ctx, root_id, &definition.poly_vars)?; - // Enumeration is legal - if first_tag_value.is_some() { - // Implement as a tagged union - - // Determine the union variants - let mut tag_value = -1; - let mut variants = Vec::with_capacity(definition.variants.len()); - for variant in &definition.variants { - tag_value += 1; - let parser_type = match &variant.value { - EnumVariantValue::None => { - None - }, - EnumVariantValue::Type(parser_type_id) => { - // Type should be resolvable, we checked this above - Some(*parser_type_id) - }, - EnumVariantValue::Integer(_) => { - debug_assert!(false, "Encountered `Integer` variant after asserting enum is a discriminated union"); - unreachable!(); - } - }; + // Note: although we cannot have embedded type dependent on the + // polymorphic variables, they might still be present as tokens + let definition_id = definition.this.upcast(); + self.lookup.insert(definition_id, DefinedType { + ast_root: root_id, + ast_definition: definition_id, + definition: DefinedTypeVariant::Enum(EnumType{ + variants, + representation: Self::enum_tag_type(min_enum_value, max_enum_value) + }), + poly_vars: self.create_initial_poly_vars(&definition.poly_vars), + is_polymorph: false, + is_pointerlike: false, + monomorphs: Vec::new() + }); - variants.push(UnionVariant{ - identifier: variant.identifier.clone(), - parser_type, - tag_value, - }) - } + Ok(true) + } - // Ensure union names and polymorphic args do not conflict - self.check_identifier_collision( - ctx, root_id, &variants, |variant| &variant.identifier, "enum variant" - )?; - self.check_poly_args_collision(ctx, root_id, &definition.poly_vars)?; + /// Resolves the basic union definiton to an entry in the type table. It + /// will not instantiate any monomorphized instances of polymorphic union + /// definitions. If a subtype has to be resolved first then this function + /// will return `false` after calling `ingest_resolve_result`. + fn resolve_base_union_definition(&mut self, ctx: &mut TypeCtx, root_id: RootId, definition_id: DefinitionId) -> Result { + debug_assert!(ctx.heap[definition_id].is_union()); + debug_assert!(!self.lookup.contains_key(&definition_id), "base union already resolved"); - let mut poly_args = self.create_initial_poly_vars(&definition.poly_vars); - for variant in &variants { - if let Some(embedded) = variant.parser_type { - self.check_and_resolve_embedded_type_and_modify_poly_args(ctx, definition_id, &mut poly_args, root_id, embedded)?; - } - } - let is_polymorph = poly_args.iter().any(|arg| arg.is_in_use); - - // Insert base definition in type table - self.lookup.insert(definition_id, DefinedType { - ast_root: root_id, - ast_definition: definition_id, - definition: DefinedTypeVariant::Union(UnionType{ - variants, - tag_representation: Self::enum_tag_type(-1, tag_value), - }), - poly_vars: poly_args, - is_polymorph, - is_pointerlike: false, // TODO: @cyclic_types - monomorphs: Vec::new() - }); - } else { - // Implement as a regular enum - let mut enum_value = -1; - let mut min_enum_value = 0; - let mut max_enum_value = 0; - let mut variants = Vec::with_capacity(definition.variants.len()); - for variant in &definition.variants { - enum_value += 1; - match &variant.value { - EnumVariantValue::None => { - variants.push(EnumVariant{ - identifier: variant.identifier.clone(), - value: enum_value, - }); - }, - EnumVariantValue::Integer(override_value) => { - enum_value = *override_value; - variants.push(EnumVariant{ - identifier: variant.identifier.clone(), - value: enum_value, - }); - }, - EnumVariantValue::Type(_) => { - debug_assert!(false, "Encountered `Type` variant after asserting enum is not a discriminated union"); - unreachable!(); + let definition = ctx.heap[definition_id].as_union(); + + // Make sure all embedded types are resolved + for variant in &definition.variants { + match &variant.value { + UnionVariantValue::None => {}, + UnionVariantValue::Embedded(embedded) => { + for embedded_id in embedded { + let resolve_result = self.resolve_base_parser_type(ctx, &definition.poly_vars, root_id, *embedded_id)?; + if !self.ingest_resolve_result(ctx, resolve_result)? { + return Ok(false) + } } } - if enum_value < min_enum_value { min_enum_value = enum_value; } - else if enum_value > max_enum_value { max_enum_value = enum_value; } } + } - // Ensure enum names and polymorphic args do not conflict - self.check_identifier_collision( - ctx, root_id, &variants, |variant| &variant.identifier, "enum variant" - )?; - self.check_poly_args_collision(ctx, root_id, &definition.poly_vars)?; - - // Note: although we cannot have embedded type dependent on the - // polymorphic variables, they might still be present as tokens - let definition_id = definition.this.upcast(); - self.lookup.insert(definition_id, DefinedType { - ast_root: root_id, - ast_definition: definition_id, - definition: DefinedTypeVariant::Enum(EnumType{ - variants, - representation: Self::enum_tag_type(min_enum_value, max_enum_value) - }), - poly_vars: self.create_initial_poly_vars(&definition.poly_vars), - is_polymorph: false, - is_pointerlike: false, - monomorphs: Vec::new() - }); + // If here then all embedded types are resolved + + // Determine the union variants + let mut tag_value = -1; + let mut variants = Vec::with_capacity(definition.variants.len()); + for variant in &definition.variants { + tag_value += 1; + let embedded = match &variant.value { + UnionVariantValue::None => { Vec::new() }, + UnionVariantValue::Embedded(embedded) => { + // Type should be resolvable, we checked this above + embedded.clone() + }, + }; + + variants.push(UnionVariant{ + identifier: variant.identifier.clone(), + embedded, + tag_value, + }) } + // Ensure union names and polymorphic args do not conflict + self.check_identifier_collision( + ctx, root_id, &variants, |variant| &variant.identifier, "enum variant" + )?; + self.check_poly_args_collision(ctx, root_id, &definition.poly_vars)?; + + let mut poly_args = self.create_initial_poly_vars(&definition.poly_vars); + for variant in &variants { + for embedded_id in &variant.embedded { + self.check_and_resolve_embedded_type_and_modify_poly_args(ctx, definition_id, &mut poly_args, root_id, *embedded_id)?; + } + } + let is_polymorph = poly_args.iter().any(|arg| arg.is_in_use); + + // Insert base definition in type table + self.lookup.insert(definition_id, DefinedType { + ast_root: root_id, + ast_definition: definition_id, + definition: DefinedTypeVariant::Union(UnionType{ + variants, + tag_representation: Self::enum_tag_type(-1, tag_value), + }), + poly_vars: poly_args, + is_polymorph, + is_pointerlike: false, // TODO: @cyclic_types + monomorphs: Vec::new() + }); + Ok(true) } diff --git a/src/protocol/parser/visitor.rs b/src/protocol/parser/visitor.rs index 70e3267c52f0af14f06c7d0de954a24c6d231655..bcfb755fdbd0acbfaeeb24c4e3f221e3e2fbfd5c 100644 --- a/src/protocol/parser/visitor.rs +++ b/src/protocol/parser/visitor.rs @@ -54,6 +54,10 @@ pub(crate) trait Visitor2 { let def = def.this; self.visit_enum_definition(ctx, def) }, + Definition::Union(def) => { + let def = def.this; + self.visit_union_definition(ctx, def) + } Definition::Struct(def) => { let def = def.this; self.visit_struct_definition(ctx, def) @@ -71,6 +75,7 @@ pub(crate) trait Visitor2 { // --- enum variant handling fn visit_enum_definition(&mut self, _ctx: &mut Ctx, _id: EnumId) -> VisitorResult { Ok(()) } + fn visit_union_definition(&mut self, _ctx: &mut Ctx, _id: UnionId) -> VisitorResult{ Ok(()) } fn visit_struct_definition(&mut self, _ctx: &mut Ctx, _id: StructId) -> VisitorResult { Ok(()) } fn visit_component_definition(&mut self, _ctx: &mut Ctx, _id: ComponentId) -> VisitorResult { Ok(()) } fn visit_function_definition(&mut self, _ctx: &mut Ctx, _id: FunctionId) -> VisitorResult { Ok(()) } diff --git a/src/protocol/parser/visitor_linker.rs b/src/protocol/parser/visitor_linker.rs index ffd76d988bd8abd78a9febacedd9de789746b035..19978255561c11bebdc20bb7a142129b9180f935 100644 --- a/src/protocol/parser/visitor_linker.rs +++ b/src/protocol/parser/visitor_linker.rs @@ -699,7 +699,6 @@ impl Visitor2 for ValidityAndLinkerVisitor { debug_assert!(!self.performing_breadth_pass); const FIELD_NOT_FOUND_SENTINEL: usize = usize::max_value(); - const VARIANT_NOT_FOUND_SENTINEL: usize = FIELD_NOT_FOUND_SENTINEL; let constant_expr = &mut ctx.heap[id]; let old_expr_parent = self.expr_parent; @@ -793,9 +792,42 @@ impl Visitor2 for ValidityAndLinkerVisitor { self.expression_buffer.truncate(old_num_exprs); }, Literal::Enum(literal) => { - let upcast_id = id.upcast(); + // TODO: @tokenizer, remove this horrible hack once we have a + // tokenizer and can distinguish types during AST-construction. + // For now see this horrible hack and weep! + let (symbol, _) = ctx.symbols.resolve_namespaced_identifier( + ctx.module.root_id, &literal.identifier + ); + if let Some(symbol) = symbol { + if let Symbol::Definition((_, definition_id)) = &symbol.symbol { + if let Some(defined_type) = ctx.types.get_base_definition(definition_id) { + if defined_type.definition.type_class() == TypeClass::Union { + // Transmute into union literal and call this function again + let old_identifier = literal.identifier.clone(); + let lit_expr = &ctx.heap[id]; + let old_position = lit_expr.position; + + ctx.heap[id] = LiteralExpression{ + this: id, + position: old_position, + value: Literal::Union(LiteralUnion{ + identifier: old_identifier, + values: vec!(), + poly_args2: Vec::new(), + definition: None, + variant_idx: 0, + }), + parent: ExpressionParent::None, + concrete_type: ConcreteType::default() + }; + + return self.visit_literal_expr(ctx, id); + } + } + } + } - // Retrieve and set type of enumeration + // Retrieve and set definion of enumeration let (definition, ident_iter) = self.find_symbol_of_type_variant( &ctx.module.source, ctx.module.root_id, &ctx.symbols, &ctx.types, &literal.identifier, TypeClass::Enum @@ -805,29 +837,98 @@ impl Visitor2 for ValidityAndLinkerVisitor { // Make sure the variant exists let (variant_ident, _) = ident_iter.prev().unwrap(); let enum_definition = definition.definition.as_enum(); - literal.variant_idx = VARIANT_NOT_FOUND_SENTINEL; - for (variant_idx, variant) in enum_definition.variants.iter().enumerate() { - if variant.identifier.value == variant_ident { + match enum_definition.variants.iter().position(|variant| { + variant.identifier.value == variant_ident + }) { + Some(variant_idx) => { literal.variant_idx = variant_idx; - break; + }, + None => { + // Reborrow + let variant = String::from_utf8_lossy(variant_ident).to_string(); + let literal = ctx.heap[id].value.as_enum(); + let enum_definition = ctx.heap[definition.ast_definition].as_enum(); + return Err(ParseError::new_error( + &ctx.module.source, literal.identifier.position, + &format!( + "The variant '{}' does not exist on the enum '{}'", + &variant, &String::from_utf8_lossy(&enum_definition.identifier.value) + ) + )); } } - if literal.variant_idx == VARIANT_NOT_FOUND_SENTINEL { + self.visit_literal_poly_args(ctx, id)?; + }, + Literal::Union(literal) => { + let upcast_id = id.upcast(); + + // Retrieve and set definition of union + let (definition, ident_iter) = self.find_symbol_of_type_variant( + &ctx.module.source, ctx.module.root_id, &ctx.symbols, &ctx.types, + &literal.identifier, TypeClass::Union + )?; + literal.definition = Some(definition.ast_definition); + + // Make sure the variant exists + let (variant_ident, _) = ident_iter.prev().unwrap(); + let union_definition = definition.definition.as_union(); + + match union_definition.variants.iter().position(|variant| { + variant.identifier.value == variant_ident + }) { + Some(variant_idx) => { + literal.variant_idx = variant_idx; + }, + None => { + // Reborrow + let variant = String::from_utf8_lossy(variant_ident).to_string(); + let literal = ctx.heap[id].value.as_union(); + let union_definition = ctx.heap[definition.ast_definition].as_union(); + return Err(ParseError::new_error( + &ctx.module.source, literal.identifier.position, + &format!( + "The variant '{}' does not exist on the union '{}'", + &variant, &String::from_utf8_lossy(&union_definition.identifier.value) + ) + )); + } + } + + // Make sure the number of specified values matches the expected + // number of embedded values in the union variant. + let union_variant = &union_definition.variants[literal.variant_idx]; + if union_variant.embedded.len() != literal.values.len() { + // Reborrow let variant = String::from_utf8_lossy(variant_ident).to_string(); - let literal = ctx.heap[id].value.as_enum(); - let enum_definition = ctx.heap[definition.ast_definition].as_enum(); + let literal = ctx.heap[id].value.as_union(); + let union_definition = ctx.heap[definition.ast_definition].as_union(); return Err(ParseError::new_error( &ctx.module.source, literal.identifier.position, &format!( - "The variant '{}' does not exist on the enum '{}'", - &variant, &String::from_utf8_lossy(&enum_definition.identifier.value) - ) + "This variant '{}' of union '{}' expects {} embedded values, but {} were specified", + variant, &String::from_utf8_lossy(&union_definition.identifier.value), + union_variant.embedded.len(), literal.values.len() + ), )) } + // Traverse embedded values of union (if any) and evaluate the + // polymorphic arguments + let old_num_exprs = self.expression_buffer.len(); + self.expression_buffer.extend(&literal.values); + let new_num_exprs = self.expression_buffer.len(); + self.visit_literal_poly_args(ctx, id)?; + + for expr_idx in old_num_exprs..new_num_exprs { + let expr_id = self.expression_buffer[expr_idx]; + self.expr_parent = ExpressionParent::Expression(upcast_id, expr_idx as u32); + self.visit_expr(ctx, expr_id)?; + } + + self.expression_buffer.truncate(old_num_exprs); } } @@ -903,6 +1004,45 @@ impl Visitor2 for ValidityAndLinkerVisitor { TypeClass::Component } else { // Expect to find a function + // TODO: @tokenizer, remove this ambiguity when tokenizer is implemented. Hacked + // in here for now. + let (symbol, _) = ctx.symbols.resolve_namespaced_identifier( + ctx.module.root_id, &symbolic.identifier + ); + if let Some(symbol) = symbol { + if let Symbol::Definition((_, definition_id)) = symbol.symbol { + if let Some(defined_type) = ctx.types.get_base_definition(&definition_id) { + if defined_type.definition.type_class() == TypeClass::Union { + // Transmute into union literal and call the appropriate traverser + let call_expr = &ctx.heap[id]; + let old_position = call_expr.position.clone(); + let old_arguments = call_expr.arguments.clone(); + let old_identifier = match &call_expr.method { + Method::Symbolic(v) => v.identifier.clone(), + _ => unreachable!(), + }; + + let expr_id = id.upcast(); + let lit_id = LiteralExpressionId(expr_id); + ctx.heap[expr_id] = Expression::Literal(LiteralExpression{ + this: lit_id, + position: old_position, + value: Literal::Union(LiteralUnion{ + identifier: old_identifier, + values: old_arguments, + poly_args2: Vec::new(), + definition: None, + variant_idx: 0, + }), + parent: ExpressionParent::None, + concrete_type: ConcreteType::default(), + }); + + return self.visit_literal_expr(ctx, lit_id); + } + } + } + } TypeClass::Function }; @@ -1697,6 +1837,9 @@ impl ValidityAndLinkerVisitor { Literal::Enum(literal) => { literal.poly_args2.extend(&literal.identifier.poly_args); }, + Literal::Union(literal) => { + literal.poly_args2.extend(&literal.identifier.poly_args); + } _ => { debug_assert!(false, "called visit_literal_poly_args on a non-polymorphic literal"); unreachable!(); @@ -1732,7 +1875,16 @@ impl ValidityAndLinkerVisitor { defined_type, maybe_poly_args, literal.identifier.position ).as_parse_error(&ctx.heap, &ctx.module.source)?; - println!("DEBUG: poly args 2: {:?}", &literal.poly_args2); + (num_to_infer, &literal.poly_args2) + }, + Literal::Union(literal) => { + let defined_type = ctx.types.get_base_definition(literal.definition.as_ref().unwrap()) + .unwrap(); + let maybe_poly_args = literal.identifier.get_poly_args(); + let num_to_infer = match_polymorphic_args_to_vars( + defined_type, maybe_poly_args, literal.identifier.position + ).as_parse_error(&ctx.heap, &ctx.module.source)?; + (num_to_infer, &literal.poly_args2) } }; @@ -1756,6 +1908,7 @@ impl ValidityAndLinkerVisitor { let poly_args = match &mut ctx.heap[lit_id].value { Literal::Struct(literal) => &mut literal.poly_args2, Literal::Enum(literal) => &mut literal.poly_args2, + Literal::Union(literal) => &mut literal.poly_args2, _ => unreachable!(), }; poly_args.reserve(num_poly_args_to_infer); diff --git a/src/protocol/tests/parser_imports.rs b/src/protocol/tests/parser_imports.rs index bbcbe96e11f2c0dc989c38b046713f13fb0d8be2..52b113ab255654806943064cefea64731bd6ef0e 100644 --- a/src/protocol/tests/parser_imports.rs +++ b/src/protocol/tests/parser_imports.rs @@ -242,9 +242,4 @@ fn test_illegal_import_use() { .assert_msg_has(0, "Could not resolve this identifier") .assert_occurs_at(0, "mod2::Foo"); }); -} - -// TODO: Test incorrect imports: -// 1. importing a module -// 2. import something a module imports -// 3. import something that doesn't exist in a module \ No newline at end of file +} \ No newline at end of file