From 706db38f28491c7f9db987f02a37fcca29b23f45 2021-04-06 14:20:55 From: MH Date: 2021-04-06 14:20:55 Subject: [PATCH] Preparatory work for union literals Contains horrible parsing hacks that transmute function calls and enum literals to union literals if appropriate. Pending the implementation of the tokenizer the AST can be constructed more neatly. --- diff --git a/src/protocol/ast.rs b/src/protocol/ast.rs index 009e3f85c8eb904b8aae1a22cc1301fdb1b9329d..325ae99245a61d17d91e0be3e78dffce70c82ea1 100644 --- a/src/protocol/ast.rs +++ b/src/protocol/ast.rs @@ -91,6 +91,7 @@ define_new_ast_id!(LocalId, VariableId, Local, Variable::Local, variables); define_aliased_ast_id!(DefinitionId, Id, Definition, definitions); define_new_ast_id!(StructId, DefinitionId, StructDefinition, Definition::Struct, definitions); define_new_ast_id!(EnumId, DefinitionId, EnumDefinition, Definition::Enum, definitions); +define_new_ast_id!(UnionId, DefinitionId, UnionDefinition, Definition::Union, definitions); define_new_ast_id!(ComponentId, DefinitionId, Component, Definition::Component, definitions); define_new_ast_id!(FunctionId, DefinitionId, Function, Definition::Function, definitions); @@ -452,6 +453,11 @@ impl Heap { Definition::Enum(f(EnumId(id))) })) } + pub fn alloc_union_definition(&mut self, f: impl FnOnce(UnionId) -> UnionDefinition) -> UnionId { + UnionId(self.definitions.alloc_with_id(|id| { + Definition::Union(f(UnionId(id))) + })) + } pub fn alloc_component(&mut self, f: impl FnOnce(ComponentId) -> Component) -> ComponentId { ComponentId(self.definitions.alloc_with_id(|id| { Definition::Component(f(ComponentId(id))) @@ -1060,90 +1066,6 @@ impl Display for Type { } } -type LiteralCharacter = Vec; -type LiteralInteger = i64; // TODO: @int_literal - -#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] -pub enum Literal { - Null, // message - True, - False, - Character(LiteralCharacter), - Integer(LiteralInteger), - Struct(LiteralStruct), - Enum(LiteralEnum), -} - -impl Literal { - pub(crate) fn as_struct(&self) -> &LiteralStruct { - if let Literal::Struct(literal) = self{ - literal - } else { - unreachable!("Attempted to obtain {:?} as Literal::Struct", self) - } - } - - pub(crate) fn as_struct_mut(&mut self) -> &mut LiteralStruct { - if let Literal::Struct(literal) = self{ - literal - } else { - unreachable!("Attempted to obtain {:?} as Literal::Struct", self) - } - } - - pub(crate) fn as_enum(&self) -> &LiteralEnum { - if let Literal::Enum(literal) = self { - literal - } else { - unreachable!("Attempted to obtain {:?} as Literal::Enum", self) - } - } -} - -#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] -pub struct LiteralStructField { - // Phase 1: parser - pub(crate) identifier: Identifier, - pub(crate) value: ExpressionId, - // Phase 2: linker - pub(crate) field_idx: usize, // in struct definition -} - -#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] -pub struct LiteralStruct { - // Phase 1: parser - pub(crate) identifier: NamespacedIdentifier, - pub(crate) fields: Vec, - // Phase 2: linker - pub(crate) poly_args2: Vec, // taken from identifier once linked to a definition - pub(crate) definition: Option -} - -#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] -pub struct LiteralEnum { - // Phase 1: parser - pub(crate) identifier: NamespacedIdentifier, - // Phase 2: linker - pub(crate) poly_args2: Vec, // taken from identifier once linked to a definition - pub(crate) definition: Option, - pub(crate) variant_idx: usize, // as present in the type table -} - -#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] -pub enum Method { - Get, - Put, - Fires, - Create, - Symbolic(MethodSymbolic) -} - -#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] -pub struct MethodSymbolic { - pub(crate) identifier: NamespacedIdentifier, - pub(crate) definition: Option -} - #[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] pub enum Field { Length, @@ -1305,6 +1227,7 @@ impl SyntaxElement for Local { pub enum Definition { Struct(StructDefinition), Enum(EnumDefinition), + Union(UnionDefinition), Component(Component), Function(Function), } @@ -1334,6 +1257,18 @@ impl Definition { _ => panic!("Unable to cast 'Definition' to 'EnumDefinition'"), } } + pub fn is_union(&self) -> bool { + match self { + Definition::Union(_) => true, + _ => false, + } + } + pub fn as_union(&self) -> &UnionDefinition { + match self { + Definition::Union(result) => result, + _ => panic!("Unable to cast 'Definition' to 'UnionDefinition'"), + } + } pub fn is_component(&self) -> bool { match self { Definition::Component(_) => true, @@ -1362,8 +1297,9 @@ impl Definition { match self { Definition::Struct(def) => &def.identifier, Definition::Enum(def) => &def.identifier, - Definition::Component(com) => &com.identifier, - Definition::Function(fun) => &fun.identifier, + Definition::Union(def) => &def.identifier, + Definition::Component(def) => &def.identifier, + Definition::Function(def) => &def.identifier, } } pub fn parameters(&self) -> &Vec { @@ -1390,6 +1326,7 @@ impl SyntaxElement for Definition { match self { Definition::Struct(def) => def.position, Definition::Enum(def) => def.position, + Definition::Union(def) => def.position, Definition::Component(def) => def.position(), Definition::Function(def) => def.position(), } @@ -1428,11 +1365,10 @@ pub struct StructDefinition { pub fields: Vec } -#[derive(Debug, Clone, serde::Serialize, serde::Deserialize, PartialEq)] +#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] pub enum EnumVariantValue { None, Integer(i64), - Type(ParserTypeId), } #[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] @@ -1452,6 +1388,29 @@ pub struct EnumDefinition { pub variants: Vec, } +#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] +pub enum UnionVariantValue { + None, + Embedded(Vec), +} + +#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] +pub struct UnionVariantDefinition { + pub position: InputPosition, + pub identifier: Identifier, + pub value: UnionVariantValue, +} + +#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] +pub struct UnionDefinition { + pub this: UnionId, + // Phase 1: parser + pub position: InputPosition, + pub identifier: Identifier, + pub poly_vars: Vec, + pub variants: Vec, +} + #[derive(Debug, Clone, Copy, serde::Serialize, serde::Deserialize)] pub enum ComponentVariant { Primitive, @@ -2533,6 +2492,10 @@ impl SyntaxElement for ArrayExpression { } } +// TODO: @tokenizer Symbolic function calls are ambiguous with union literals +// that accept embedded values (although the polymorphic arguments are placed +// differently). To prevent double work we parse as CallExpression, and during +// validation we may transform the expression into a union literal. #[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] pub struct CallExpression { pub this: CallExpressionId, @@ -2540,7 +2503,7 @@ pub struct CallExpression { pub position: InputPosition, pub method: Method, pub arguments: Vec, - pub poly_args: Vec, + pub poly_args: Vec, // if symbolic will be determined during validation phase // Phase 2: linker pub parent: ExpressionParent, // Phase 3: type checking @@ -2553,6 +2516,21 @@ impl SyntaxElement for CallExpression { } } +#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] +pub enum Method { + Get, + Put, + Fires, + Create, + Symbolic(MethodSymbolic) +} + +#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] +pub struct MethodSymbolic { + pub(crate) identifier: NamespacedIdentifier, + pub(crate) definition: Option +} + #[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] pub struct LiteralExpression { pub this: LiteralExpressionId, @@ -2571,6 +2549,99 @@ impl SyntaxElement for LiteralExpression { } } +type LiteralCharacter = Vec; +type LiteralInteger = i64; // TODO: @int_literal + +#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] +pub enum Literal { + Null, // message + True, + False, + Character(LiteralCharacter), + Integer(LiteralInteger), + Struct(LiteralStruct), + Enum(LiteralEnum), + Union(LiteralUnion), +} + +impl Literal { + pub(crate) fn as_struct(&self) -> &LiteralStruct { + if let Literal::Struct(literal) = self{ + literal + } else { + unreachable!("Attempted to obtain {:?} as Literal::Struct", self) + } + } + + pub(crate) fn as_struct_mut(&mut self) -> &mut LiteralStruct { + if let Literal::Struct(literal) = self{ + literal + } else { + unreachable!("Attempted to obtain {:?} as Literal::Struct", self) + } + } + + pub(crate) fn as_enum(&self) -> &LiteralEnum { + if let Literal::Enum(literal) = self { + literal + } else { + unreachable!("Attempted to obtain {:?} as Literal::Enum", self) + } + } + + pub(crate) fn as_union(&self) -> &LiteralUnion { + if let Literal::Union(literal) = self { + literal + } else { + unreachable!("Attempted to obtain {:?} as Literal::Union", self) + } + } +} + +#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] +pub struct LiteralStructField { + // Phase 1: parser + pub(crate) identifier: Identifier, + pub(crate) value: ExpressionId, + // Phase 2: linker + pub(crate) field_idx: usize, // in struct definition +} + +#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] +pub struct LiteralStruct { + // Phase 1: parser + pub(crate) identifier: NamespacedIdentifier, + pub(crate) fields: Vec, + // Phase 2: linker + pub(crate) poly_args2: Vec, // taken from identifier once linked to a definition + pub(crate) definition: Option +} + +// TODO: @tokenizer Enum literals are ambiguous with union literals that do not +// accept embedded values. To prevent double work for now we parse as a +// LiteralEnum, and during validation we may transform the expression into a +// union literal. +#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] +pub struct LiteralEnum { + // Phase 1: parser + pub(crate) identifier: NamespacedIdentifier, + // Phase 2: linker + pub(crate) poly_args2: Vec, // taken from identifier once linked to a definition + pub(crate) definition: Option, + pub(crate) variant_idx: usize, // as present in the type table +} + +#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] +pub struct LiteralUnion { + // Phase 1: parser + pub(crate) identifier: NamespacedIdentifier, + pub(crate) values: Vec, + // Phase 2: linker + pub(crate) poly_args2: Vec, // taken from identifier once linked to a definition + pub(crate) definition: Option, + pub(crate) variant_idx: usize, // as present in type table +} + #[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] pub struct VariableExpression { pub this: VariableExpressionId, diff --git a/src/protocol/ast_printer.rs b/src/protocol/ast_printer.rs index 92c14d0d7f752d2ac9511911219cdf90d381aee4..4e56ceb4b25452f95d30e5868fcf1dc7daecbbd1 100644 --- a/src/protocol/ast_printer.rs +++ b/src/protocol/ast_printer.rs @@ -16,6 +16,7 @@ const PREFIX_LOCAL_ID: &'static str = "Loc "; const PREFIX_DEFINITION_ID: &'static str = "Def "; const PREFIX_STRUCT_ID: &'static str = "DefS"; const PREFIX_ENUM_ID: &'static str = "DefE"; +const PREFIX_UNION_ID: &'static str = "DefU"; const PREFIX_COMPONENT_ID: &'static str = "DefC"; const PREFIX_FUNCTION_ID: &'static str = "DefF"; const PREFIX_STMT_ID: &'static str = "Stmt"; @@ -311,11 +312,38 @@ impl ASTWriter { match &variant.value { EnumVariantValue::None => variant_value.with_s_val("None"), EnumVariantValue::Integer(value) => variant_value.with_disp_val(value), - EnumVariantValue::Type(parser_type_id) => variant_value - .with_custom_val(|s| write_parser_type(s, heap, &heap[*parser_type_id])), }; } }, + Definition::Union(def) => { + self.kv(indent).with_id(PREFIX_UNION_ID, def.this.0.index) + .with_s_key("DefinitionUnion"); + + self.kv(indent2).with_s_key("Name").with_ascii_val(&def.identifier.value); + for poly_var_id in &def.poly_vars { + self.kv(indent3).with_s_key("PolyVar").with_ascii_val(&poly_var_id.value); + } + + self.kv(indent2).with_s_key("Variants"); + for variant in &def.variants { + self.kv(indent3).with_s_key("Variant"); + self.kv(indent4).with_s_key("Name") + .with_ascii_val(&variant.identifier.value); + + match &variant.value { + UnionVariantValue::None => { + self.kv(indent4).with_s_key("Value").with_s_val("None"); + } + UnionVariantValue::Embedded(embedded) => { + self.kv(indent4).with_s_key("Values"); + for embedded in embedded { + self.kv(indent4+1).with_s_key("Value") + .with_custom_val(|v| write_parser_type(v, heap, &heap[*embedded])); + } + } + } + } + } Definition::Function(def) => { self.kv(indent).with_id(PREFIX_FUNCTION_ID, def.this.0.index) .with_s_key("DefinitionFunction"); @@ -368,6 +396,19 @@ impl ASTWriter { self.kv(indent2).with_s_key("ParserType").with_custom_val(|w| write_parser_type(w, heap, &heap[param.parser_type])); } + fn write_poly_args(&mut self, heap: &Heap, poly_args: &[ParserTypeId], indent: usize) { + if poly_args.is_empty() { + return + } + + let indent2 = indent + 1; + self.kv(indent).with_s_key("PolymorphicArguments"); + for poly_arg in poly_args { + self.kv(indent2).with_s_key("Argument") + .with_custom_val(|v| write_parser_type(v, heap, &heap[*poly_arg])); + } + } + fn write_stmt(&mut self, heap: &Heap, stmt_id: StatementId, indent: usize) { let stmt = &heap[stmt_id]; let indent2 = indent + 1; @@ -666,14 +707,10 @@ impl ASTWriter { val.with_s_val("Struct"); let indent4 = indent3 + 1; - // Polymorphic arguments - if !data.poly_args2.is_empty() { - self.kv(indent3).with_s_key("PolymorphicArguments"); - for poly_arg in &data.poly_args2 { - self.kv(indent4).with_s_key("Argument") - .with_custom_val(|v| write_parser_type(v, heap, &heap[*poly_arg])); - } - } + self.write_poly_args(heap, &data.poly_args2, indent3); + self.kv(indent3).with_s_key("Definition").with_custom_val(|s| { + write_option(s, data.definition.as_ref().map(|v| &v.index)); + }); for field in &data.fields { self.kv(indent3).with_s_key("Field"); @@ -685,15 +722,25 @@ impl ASTWriter { }, Literal::Enum(data) => { val.with_s_val("Enum"); - let indent4 = indent3 + 1; - // Polymorphic arguments - if !data.poly_args2.is_empty() { - self.kv(indent3).with_s_key("PolymorphicArguments"); - for poly_arg in &data.poly_args2 { - self.kv(indent4).with_s_key("Argument") - .with_custom_val(|v| write_parser_type(v, heap, &heap[*poly_arg])); - } + self.write_poly_args(heap, &data.poly_args2, indent3); + self.kv(indent3).with_s_key("Definition").with_custom_val(|s| { + write_option(s, data.definition.as_ref().map(|v| &v.index)) + }); + self.kv(indent3).with_s_key("VariantIdx").with_disp_val(&data.variant_idx); + }, + Literal::Union(data) => { + val.with_s_val("Union"); + let indent4 = indent3 + 1; + self.write_poly_args(heap, &data.poly_args2, indent3); + self.kv(indent3).with_s_key("Definition").with_custom_val(|s| { + write_option(s, data.definition.as_ref().map(|v| &v.index)); + }); + self.kv(indent3).with_s_key("VariantIdx").with_disp_val(&data.variant_idx); + + for value in &data.values { + self.kv(indent3).with_s_key("Value"); + self.write_expr(heap, *value, indent4); } } } @@ -722,14 +769,7 @@ impl ASTWriter { } } - // Polymorphic arguments - if !expr.poly_args.is_empty() { - self.kv(indent2).with_s_key("PolymorphicArguments"); - for poly_arg in &expr.poly_args { - self.kv(indent3).with_s_key("Argument") - .with_custom_val(|v| write_parser_type(v, heap, &heap[*poly_arg])); - } - } + self.write_poly_args(heap, &expr.poly_args, indent2); // Arguments self.kv(indent2).with_s_key("Arguments"); @@ -848,7 +888,7 @@ fn write_concrete_type(target: &mut String, heap: &Heap, def_id: DefinitionId, t // Marker points to polymorphic variable index let definition = &heap[def_id]; let poly_var_ident = match definition { - Definition::Struct(_) | Definition::Enum(_) => unreachable!(), + Definition::Struct(_) | Definition::Enum(_) | Definition::Union(_) => unreachable!(), Definition::Function(definition) => &definition.poly_vars[*marker].value, Definition::Component(definition) => &definition.poly_vars[*marker].value, }; diff --git a/src/protocol/eval.rs b/src/protocol/eval.rs index a337ef11a732ee6b50a149b64e7d6ea2c77f31ac..90f093995931cd3c1dab2a90d8b5fd96906a4eb7 100644 --- a/src/protocol/eval.rs +++ b/src/protocol/eval.rs @@ -89,6 +89,7 @@ impl Value { Literal::Character(_data) => unimplemented!(), Literal::Struct(_data) => unimplemented!(), Literal::Enum(_data) => unimplemented!(), + Literal::Union(_data) => unimplemented!(), } } fn set(&mut self, index: &Value, value: &Value) -> Option { diff --git a/src/protocol/lexer.rs b/src/protocol/lexer.rs index bb7f61a98143ad974a9866dada82acfe546b8400..29ed1c92942d206de98b701953917367d665bd00 100644 --- a/src/protocol/lexer.rs +++ b/src/protocol/lexer.rs @@ -2182,6 +2182,8 @@ impl Lexer<'_> { Ok(self.consume_struct_definition(h)?.upcast()) } else if self.has_keyword(b"enum") { Ok(self.consume_enum_definition(h)?.upcast()) + } else if self.has_keyword(b"union") { + Ok(self.consume_union_definition(h)?.upcast()) } else if self.has_keyword(b"composite") || self.has_keyword(b"primitive") { Ok(self.consume_component_definition(h)?.upcast()) } else { @@ -2262,21 +2264,12 @@ impl Lexer<'_> { let value = lexer.consume_integer()?; EnumVariantValue::Integer(value) }, - Some(b'(') => { - // Embedded type - lexer.source.consume(); - lexer.consume_whitespace(false)?; - let embedded_type = lexer.consume_type(heap, false)?; - lexer.consume_whitespace(false)?; - lexer.consume_string(b")")?; - EnumVariantValue::Type(embedded_type) - }, Some(b'}') => { // End of enum EnumVariantValue::None } _ => { - return Err(lexer.error_at_pos("Expected ',', '=', '}' or '('")); + return Err(lexer.error_at_pos("Expected ',', '}' or '='")); } }; @@ -2298,6 +2291,69 @@ impl Lexer<'_> { variants, })) } + fn consume_union_definition(&mut self, h: &mut Heap) -> Result { + // Parse "union" keyword, optional polyvars and the identifier + let union_pos = self.source.pos(); + self.consume_keyword(b"union")?; + self.consume_whitespace(true)?; + let union_ident = self.consume_identifier()?; + self.consume_whitespace(false)?; + let poly_vars = self.consume_polymorphic_vars(h)?; + self.consume_whitespace(false)?; + + let variants = match self.consume_comma_separated( + h, b'{', b'}', "Expected end of union variant list", + |lexer, heap| { + // Variant identifier + let position = lexer.source.pos(); + let identifier = lexer.consume_identifier()?; + lexer.consume_whitespace(false)?; + + // Optional variant value + let next = lexer.source.next(); + let value = match next { + Some(b',') | Some(b'}') => { + // Continue parsing using `consume_comma_separated` + UnionVariantValue::None + }, + Some(b'(') => { + // Embedded type(s) + let embedded = lexer.consume_comma_separated( + heap, b'(', b')', "Expected end of embedded type list of union variant", + |lexer, heap| { + lexer.consume_type(heap, false) + } + )?.unwrap(); + + if embedded.is_empty() { + return Err(lexer.error_at_pos("Expected at least one embedded type")); + } + + UnionVariantValue::Embedded(embedded) + }, + _ => { + return Err(lexer.error_at_pos("Expected ',', '}' or '('")); + }, + }; + + Ok(UnionVariantDefinition{ position, identifier, value }) + } + )? { + Some(variants) => variants, + None => return Err(ParseError::new_error( + self.source, union_pos, + "A union definition must be followed by its variants" + )), + }; + + Ok(h.alloc_union_definition(|this| UnionDefinition{ + this, + position: union_pos, + identifier: union_ident, + poly_vars, + variants, + })) + } fn consume_component_definition(&mut self, h: &mut Heap) -> Result { // TODO: Cleanup if self.has_keyword(b"composite") { diff --git a/src/protocol/parser/depth_visitor.rs b/src/protocol/parser/depth_visitor.rs index 06672185b15a041039576e7ca635448c7753fae2..8189cf0cfdf0fb1b5611e57c97a33e51985b3500 100644 --- a/src/protocol/parser/depth_visitor.rs +++ b/src/protocol/parser/depth_visitor.rs @@ -26,6 +26,9 @@ pub(crate) trait Visitor: Sized { fn visit_enum_definition(&mut self, _h: &mut Heap, _def: EnumId) -> VisitorResult { Ok(()) } + fn visit_union_definition(&mut self, _h: &mut Heap, _def: UnionId) -> VisitorResult { + Ok(()) + } fn visit_component_definition(&mut self, h: &mut Heap, def: ComponentId) -> VisitorResult { recursive_component_definition(self, h, def) } @@ -241,6 +244,7 @@ fn recursive_symbol_definition( match h[def].clone() { Definition::Struct(def) => this.visit_struct_definition(h, def.this), Definition::Enum(def) => this.visit_enum_definition(h, def.this), + Definition::Union(def) => this.visit_union_definition(h, def.this), Definition::Component(cdef) => this.visit_component_definition(h, cdef.this), Definition::Function(fdef) => this.visit_function_definition(h, fdef.this), } diff --git a/src/protocol/parser/type_resolver.rs b/src/protocol/parser/type_resolver.rs index b381df2e8353835bc246adce4e239aecac4914a4..0bc1ba54cc3f44f4c478753244ab72bddfe0d484 100644 --- a/src/protocol/parser/type_resolver.rs +++ b/src/protocol/parser/type_resolver.rs @@ -907,7 +907,7 @@ impl TypeResolvingVisitor { }) } }, - Definition::Enum(_) | Definition::Struct(_) => {}, + Definition::Enum(_) | Definition::Struct(_) | Definition::Union(_) => {}, } } } @@ -2792,8 +2792,8 @@ impl TypeResolvingVisitor { let return_type = self.determine_inference_type_from_parser_type(ctx, definition.return_type, false); (parameter_types, return_type) }, - Definition::Struct(_) | Definition::Enum(_) => { - unreachable!("insert initial polymorph data for struct/enum"); + Definition::Struct(_) | Definition::Enum(_) | Definition::Union(_) => { + unreachable!("insert initial polymorph data for struct/enum/union"); } } } diff --git a/src/protocol/parser/type_table.rs b/src/protocol/parser/type_table.rs index 86cd109ebc04b44a443f725934a3f9b527a4ffc4..60d2c3886e4cc8327aaaf16ef7692c78468c322b 100644 --- a/src/protocol/parser/type_table.rs +++ b/src/protocol/parser/type_table.rs @@ -76,7 +76,7 @@ impl TypeClass { pub(crate) fn display_name(&self) -> &'static str { match self { TypeClass::Enum => "enum", - TypeClass::Union => "enum", + TypeClass::Union => "union", TypeClass::Struct => "struct", TypeClass::Function => "function", TypeClass::Component => "component", @@ -175,6 +175,13 @@ impl DefinedTypeVariant { _ => unreachable!("Cannot convert {} to enum variant", self.type_class()) } } + + pub(crate) fn as_union(&self) -> &UnionType { + match self { + DefinedTypeVariant::Union(v) => v, + _ => unreachable!("Cannot convert {} to union variant", self.type_class()) + } + } } /// `EnumType` is the classical C/C++ enum type. It has various variants with @@ -197,14 +204,14 @@ pub struct EnumVariant { /// A value is an element of the union, identified by its tag, and may contain /// a single subtype. pub struct UnionType { - variants: Vec, - tag_representation: PrimitiveType + pub(crate) variants: Vec, + pub(crate) tag_representation: PrimitiveType } pub struct UnionVariant { - identifier: Identifier, - parser_type: Option, - tag_value: i64, + pub(crate) identifier: Identifier, + pub(crate) embedded: Vec, // zero-length does not have embedded values + pub(crate) tag_value: i64, } pub struct StructType { @@ -401,6 +408,7 @@ impl TypeTable { let can_pop_breadcrumb = match definition { // TODO: @cleanup Borrow rules hax Definition::Enum(_) => self.resolve_base_enum_definition(ctx, root_id, definition_id), + Definition::Union(_) => self.resolve_base_union_definition(ctx, root_id, definition_id), Definition::Struct(_) => self.resolve_base_struct_definition(ctx, root_id, definition_id), Definition::Component(_) => self.resolve_base_component_definition(ctx, root_id, definition_id), Definition::Function(_) => self.resolve_base_function_definition(ctx, root_id, definition_id), @@ -428,157 +436,131 @@ impl TypeTable { let definition = ctx.heap[definition_id].as_enum(); - // Check if the enum should be implemented as a classic enumeration or - // a tagged union. Keep track of variant index for error messages. Make - // sure all embedded types are resolved. - let mut first_tag_value = None; - let mut first_int_value = None; + let mut enum_value = -1; + let mut min_enum_value = 0; + let mut max_enum_value = 0; + let mut variants = Vec::with_capacity(definition.variants.len()); for variant in &definition.variants { + enum_value += 1; match &variant.value { - EnumVariantValue::None => {}, - EnumVariantValue::Integer(_) => if first_int_value.is_none() { - first_int_value = Some(variant.position); + EnumVariantValue::None => { + variants.push(EnumVariant{ + identifier: variant.identifier.clone(), + value: enum_value, + }); }, - EnumVariantValue::Type(variant_type_id) => { - if first_tag_value.is_none() { - first_tag_value = Some(variant.position); - } - - // Check if the embedded type needs to be resolved - let resolve_result = self.resolve_base_parser_type(ctx, &definition.poly_vars, root_id, *variant_type_id)?; - if !self.ingest_resolve_result(ctx, resolve_result)? { - return Ok(false) - } + EnumVariantValue::Integer(override_value) => { + enum_value = *override_value; + variants.push(EnumVariant{ + identifier: variant.identifier.clone(), + value: enum_value, + }); } } + if enum_value < min_enum_value { min_enum_value = enum_value; } + else if enum_value > max_enum_value { max_enum_value = enum_value; } } - if first_tag_value.is_some() && first_int_value.is_some() { - // Not illegal, but useless and probably a programmer mistake - let module_source = &ctx.modules[root_id.index as usize].source; - let tag_pos = first_tag_value.unwrap(); - let int_pos = first_int_value.unwrap(); - return Err( - ParseError::new_error( - module_source, definition.position, - "Illegal combination of enum integer variant(s) and enum union variant(s)" - ) - .with_postfixed_info(module_source, int_pos, "Assigning an integer value here") - .with_postfixed_info(module_source, tag_pos, "Embedding a type in a union variant here") - ); - } + // Ensure enum names and polymorphic args do not conflict + self.check_identifier_collision( + ctx, root_id, &variants, |variant| &variant.identifier, "enum variant" + )?; + self.check_poly_args_collision(ctx, root_id, &definition.poly_vars)?; - // Enumeration is legal - if first_tag_value.is_some() { - // Implement as a tagged union - - // Determine the union variants - let mut tag_value = -1; - let mut variants = Vec::with_capacity(definition.variants.len()); - for variant in &definition.variants { - tag_value += 1; - let parser_type = match &variant.value { - EnumVariantValue::None => { - None - }, - EnumVariantValue::Type(parser_type_id) => { - // Type should be resolvable, we checked this above - Some(*parser_type_id) - }, - EnumVariantValue::Integer(_) => { - debug_assert!(false, "Encountered `Integer` variant after asserting enum is a discriminated union"); - unreachable!(); - } - }; + // Note: although we cannot have embedded type dependent on the + // polymorphic variables, they might still be present as tokens + let definition_id = definition.this.upcast(); + self.lookup.insert(definition_id, DefinedType { + ast_root: root_id, + ast_definition: definition_id, + definition: DefinedTypeVariant::Enum(EnumType{ + variants, + representation: Self::enum_tag_type(min_enum_value, max_enum_value) + }), + poly_vars: self.create_initial_poly_vars(&definition.poly_vars), + is_polymorph: false, + is_pointerlike: false, + monomorphs: Vec::new() + }); - variants.push(UnionVariant{ - identifier: variant.identifier.clone(), - parser_type, - tag_value, - }) - } + Ok(true) + } - // Ensure union names and polymorphic args do not conflict - self.check_identifier_collision( - ctx, root_id, &variants, |variant| &variant.identifier, "enum variant" - )?; - self.check_poly_args_collision(ctx, root_id, &definition.poly_vars)?; + /// Resolves the basic union definiton to an entry in the type table. It + /// will not instantiate any monomorphized instances of polymorphic union + /// definitions. If a subtype has to be resolved first then this function + /// will return `false` after calling `ingest_resolve_result`. + fn resolve_base_union_definition(&mut self, ctx: &mut TypeCtx, root_id: RootId, definition_id: DefinitionId) -> Result { + debug_assert!(ctx.heap[definition_id].is_union()); + debug_assert!(!self.lookup.contains_key(&definition_id), "base union already resolved"); - let mut poly_args = self.create_initial_poly_vars(&definition.poly_vars); - for variant in &variants { - if let Some(embedded) = variant.parser_type { - self.check_and_resolve_embedded_type_and_modify_poly_args(ctx, definition_id, &mut poly_args, root_id, embedded)?; - } - } - let is_polymorph = poly_args.iter().any(|arg| arg.is_in_use); - - // Insert base definition in type table - self.lookup.insert(definition_id, DefinedType { - ast_root: root_id, - ast_definition: definition_id, - definition: DefinedTypeVariant::Union(UnionType{ - variants, - tag_representation: Self::enum_tag_type(-1, tag_value), - }), - poly_vars: poly_args, - is_polymorph, - is_pointerlike: false, // TODO: @cyclic_types - monomorphs: Vec::new() - }); - } else { - // Implement as a regular enum - let mut enum_value = -1; - let mut min_enum_value = 0; - let mut max_enum_value = 0; - let mut variants = Vec::with_capacity(definition.variants.len()); - for variant in &definition.variants { - enum_value += 1; - match &variant.value { - EnumVariantValue::None => { - variants.push(EnumVariant{ - identifier: variant.identifier.clone(), - value: enum_value, - }); - }, - EnumVariantValue::Integer(override_value) => { - enum_value = *override_value; - variants.push(EnumVariant{ - identifier: variant.identifier.clone(), - value: enum_value, - }); - }, - EnumVariantValue::Type(_) => { - debug_assert!(false, "Encountered `Type` variant after asserting enum is not a discriminated union"); - unreachable!(); + let definition = ctx.heap[definition_id].as_union(); + + // Make sure all embedded types are resolved + for variant in &definition.variants { + match &variant.value { + UnionVariantValue::None => {}, + UnionVariantValue::Embedded(embedded) => { + for embedded_id in embedded { + let resolve_result = self.resolve_base_parser_type(ctx, &definition.poly_vars, root_id, *embedded_id)?; + if !self.ingest_resolve_result(ctx, resolve_result)? { + return Ok(false) + } } } - if enum_value < min_enum_value { min_enum_value = enum_value; } - else if enum_value > max_enum_value { max_enum_value = enum_value; } } + } - // Ensure enum names and polymorphic args do not conflict - self.check_identifier_collision( - ctx, root_id, &variants, |variant| &variant.identifier, "enum variant" - )?; - self.check_poly_args_collision(ctx, root_id, &definition.poly_vars)?; - - // Note: although we cannot have embedded type dependent on the - // polymorphic variables, they might still be present as tokens - let definition_id = definition.this.upcast(); - self.lookup.insert(definition_id, DefinedType { - ast_root: root_id, - ast_definition: definition_id, - definition: DefinedTypeVariant::Enum(EnumType{ - variants, - representation: Self::enum_tag_type(min_enum_value, max_enum_value) - }), - poly_vars: self.create_initial_poly_vars(&definition.poly_vars), - is_polymorph: false, - is_pointerlike: false, - monomorphs: Vec::new() - }); + // If here then all embedded types are resolved + + // Determine the union variants + let mut tag_value = -1; + let mut variants = Vec::with_capacity(definition.variants.len()); + for variant in &definition.variants { + tag_value += 1; + let embedded = match &variant.value { + UnionVariantValue::None => { Vec::new() }, + UnionVariantValue::Embedded(embedded) => { + // Type should be resolvable, we checked this above + embedded.clone() + }, + }; + + variants.push(UnionVariant{ + identifier: variant.identifier.clone(), + embedded, + tag_value, + }) } + // Ensure union names and polymorphic args do not conflict + self.check_identifier_collision( + ctx, root_id, &variants, |variant| &variant.identifier, "enum variant" + )?; + self.check_poly_args_collision(ctx, root_id, &definition.poly_vars)?; + + let mut poly_args = self.create_initial_poly_vars(&definition.poly_vars); + for variant in &variants { + for embedded_id in &variant.embedded { + self.check_and_resolve_embedded_type_and_modify_poly_args(ctx, definition_id, &mut poly_args, root_id, *embedded_id)?; + } + } + let is_polymorph = poly_args.iter().any(|arg| arg.is_in_use); + + // Insert base definition in type table + self.lookup.insert(definition_id, DefinedType { + ast_root: root_id, + ast_definition: definition_id, + definition: DefinedTypeVariant::Union(UnionType{ + variants, + tag_representation: Self::enum_tag_type(-1, tag_value), + }), + poly_vars: poly_args, + is_polymorph, + is_pointerlike: false, // TODO: @cyclic_types + monomorphs: Vec::new() + }); + Ok(true) } diff --git a/src/protocol/parser/visitor.rs b/src/protocol/parser/visitor.rs index 70e3267c52f0af14f06c7d0de954a24c6d231655..bcfb755fdbd0acbfaeeb24c4e3f221e3e2fbfd5c 100644 --- a/src/protocol/parser/visitor.rs +++ b/src/protocol/parser/visitor.rs @@ -54,6 +54,10 @@ pub(crate) trait Visitor2 { let def = def.this; self.visit_enum_definition(ctx, def) }, + Definition::Union(def) => { + let def = def.this; + self.visit_union_definition(ctx, def) + } Definition::Struct(def) => { let def = def.this; self.visit_struct_definition(ctx, def) @@ -71,6 +75,7 @@ pub(crate) trait Visitor2 { // --- enum variant handling fn visit_enum_definition(&mut self, _ctx: &mut Ctx, _id: EnumId) -> VisitorResult { Ok(()) } + fn visit_union_definition(&mut self, _ctx: &mut Ctx, _id: UnionId) -> VisitorResult{ Ok(()) } fn visit_struct_definition(&mut self, _ctx: &mut Ctx, _id: StructId) -> VisitorResult { Ok(()) } fn visit_component_definition(&mut self, _ctx: &mut Ctx, _id: ComponentId) -> VisitorResult { Ok(()) } fn visit_function_definition(&mut self, _ctx: &mut Ctx, _id: FunctionId) -> VisitorResult { Ok(()) } diff --git a/src/protocol/parser/visitor_linker.rs b/src/protocol/parser/visitor_linker.rs index ffd76d988bd8abd78a9febacedd9de789746b035..19978255561c11bebdc20bb7a142129b9180f935 100644 --- a/src/protocol/parser/visitor_linker.rs +++ b/src/protocol/parser/visitor_linker.rs @@ -699,7 +699,6 @@ impl Visitor2 for ValidityAndLinkerVisitor { debug_assert!(!self.performing_breadth_pass); const FIELD_NOT_FOUND_SENTINEL: usize = usize::max_value(); - const VARIANT_NOT_FOUND_SENTINEL: usize = FIELD_NOT_FOUND_SENTINEL; let constant_expr = &mut ctx.heap[id]; let old_expr_parent = self.expr_parent; @@ -793,9 +792,42 @@ impl Visitor2 for ValidityAndLinkerVisitor { self.expression_buffer.truncate(old_num_exprs); }, Literal::Enum(literal) => { - let upcast_id = id.upcast(); + // TODO: @tokenizer, remove this horrible hack once we have a + // tokenizer and can distinguish types during AST-construction. + // For now see this horrible hack and weep! + let (symbol, _) = ctx.symbols.resolve_namespaced_identifier( + ctx.module.root_id, &literal.identifier + ); + if let Some(symbol) = symbol { + if let Symbol::Definition((_, definition_id)) = &symbol.symbol { + if let Some(defined_type) = ctx.types.get_base_definition(definition_id) { + if defined_type.definition.type_class() == TypeClass::Union { + // Transmute into union literal and call this function again + let old_identifier = literal.identifier.clone(); + let lit_expr = &ctx.heap[id]; + let old_position = lit_expr.position; + + ctx.heap[id] = LiteralExpression{ + this: id, + position: old_position, + value: Literal::Union(LiteralUnion{ + identifier: old_identifier, + values: vec!(), + poly_args2: Vec::new(), + definition: None, + variant_idx: 0, + }), + parent: ExpressionParent::None, + concrete_type: ConcreteType::default() + }; + + return self.visit_literal_expr(ctx, id); + } + } + } + } - // Retrieve and set type of enumeration + // Retrieve and set definion of enumeration let (definition, ident_iter) = self.find_symbol_of_type_variant( &ctx.module.source, ctx.module.root_id, &ctx.symbols, &ctx.types, &literal.identifier, TypeClass::Enum @@ -805,29 +837,98 @@ impl Visitor2 for ValidityAndLinkerVisitor { // Make sure the variant exists let (variant_ident, _) = ident_iter.prev().unwrap(); let enum_definition = definition.definition.as_enum(); - literal.variant_idx = VARIANT_NOT_FOUND_SENTINEL; - for (variant_idx, variant) in enum_definition.variants.iter().enumerate() { - if variant.identifier.value == variant_ident { + match enum_definition.variants.iter().position(|variant| { + variant.identifier.value == variant_ident + }) { + Some(variant_idx) => { literal.variant_idx = variant_idx; - break; + }, + None => { + // Reborrow + let variant = String::from_utf8_lossy(variant_ident).to_string(); + let literal = ctx.heap[id].value.as_enum(); + let enum_definition = ctx.heap[definition.ast_definition].as_enum(); + return Err(ParseError::new_error( + &ctx.module.source, literal.identifier.position, + &format!( + "The variant '{}' does not exist on the enum '{}'", + &variant, &String::from_utf8_lossy(&enum_definition.identifier.value) + ) + )); } } - if literal.variant_idx == VARIANT_NOT_FOUND_SENTINEL { + self.visit_literal_poly_args(ctx, id)?; + }, + Literal::Union(literal) => { + let upcast_id = id.upcast(); + + // Retrieve and set definition of union + let (definition, ident_iter) = self.find_symbol_of_type_variant( + &ctx.module.source, ctx.module.root_id, &ctx.symbols, &ctx.types, + &literal.identifier, TypeClass::Union + )?; + literal.definition = Some(definition.ast_definition); + + // Make sure the variant exists + let (variant_ident, _) = ident_iter.prev().unwrap(); + let union_definition = definition.definition.as_union(); + + match union_definition.variants.iter().position(|variant| { + variant.identifier.value == variant_ident + }) { + Some(variant_idx) => { + literal.variant_idx = variant_idx; + }, + None => { + // Reborrow + let variant = String::from_utf8_lossy(variant_ident).to_string(); + let literal = ctx.heap[id].value.as_union(); + let union_definition = ctx.heap[definition.ast_definition].as_union(); + return Err(ParseError::new_error( + &ctx.module.source, literal.identifier.position, + &format!( + "The variant '{}' does not exist on the union '{}'", + &variant, &String::from_utf8_lossy(&union_definition.identifier.value) + ) + )); + } + } + + // Make sure the number of specified values matches the expected + // number of embedded values in the union variant. + let union_variant = &union_definition.variants[literal.variant_idx]; + if union_variant.embedded.len() != literal.values.len() { + // Reborrow let variant = String::from_utf8_lossy(variant_ident).to_string(); - let literal = ctx.heap[id].value.as_enum(); - let enum_definition = ctx.heap[definition.ast_definition].as_enum(); + let literal = ctx.heap[id].value.as_union(); + let union_definition = ctx.heap[definition.ast_definition].as_union(); return Err(ParseError::new_error( &ctx.module.source, literal.identifier.position, &format!( - "The variant '{}' does not exist on the enum '{}'", - &variant, &String::from_utf8_lossy(&enum_definition.identifier.value) - ) + "This variant '{}' of union '{}' expects {} embedded values, but {} were specified", + variant, &String::from_utf8_lossy(&union_definition.identifier.value), + union_variant.embedded.len(), literal.values.len() + ), )) } + // Traverse embedded values of union (if any) and evaluate the + // polymorphic arguments + let old_num_exprs = self.expression_buffer.len(); + self.expression_buffer.extend(&literal.values); + let new_num_exprs = self.expression_buffer.len(); + self.visit_literal_poly_args(ctx, id)?; + + for expr_idx in old_num_exprs..new_num_exprs { + let expr_id = self.expression_buffer[expr_idx]; + self.expr_parent = ExpressionParent::Expression(upcast_id, expr_idx as u32); + self.visit_expr(ctx, expr_id)?; + } + + self.expression_buffer.truncate(old_num_exprs); } } @@ -903,6 +1004,45 @@ impl Visitor2 for ValidityAndLinkerVisitor { TypeClass::Component } else { // Expect to find a function + // TODO: @tokenizer, remove this ambiguity when tokenizer is implemented. Hacked + // in here for now. + let (symbol, _) = ctx.symbols.resolve_namespaced_identifier( + ctx.module.root_id, &symbolic.identifier + ); + if let Some(symbol) = symbol { + if let Symbol::Definition((_, definition_id)) = symbol.symbol { + if let Some(defined_type) = ctx.types.get_base_definition(&definition_id) { + if defined_type.definition.type_class() == TypeClass::Union { + // Transmute into union literal and call the appropriate traverser + let call_expr = &ctx.heap[id]; + let old_position = call_expr.position.clone(); + let old_arguments = call_expr.arguments.clone(); + let old_identifier = match &call_expr.method { + Method::Symbolic(v) => v.identifier.clone(), + _ => unreachable!(), + }; + + let expr_id = id.upcast(); + let lit_id = LiteralExpressionId(expr_id); + ctx.heap[expr_id] = Expression::Literal(LiteralExpression{ + this: lit_id, + position: old_position, + value: Literal::Union(LiteralUnion{ + identifier: old_identifier, + values: old_arguments, + poly_args2: Vec::new(), + definition: None, + variant_idx: 0, + }), + parent: ExpressionParent::None, + concrete_type: ConcreteType::default(), + }); + + return self.visit_literal_expr(ctx, lit_id); + } + } + } + } TypeClass::Function }; @@ -1697,6 +1837,9 @@ impl ValidityAndLinkerVisitor { Literal::Enum(literal) => { literal.poly_args2.extend(&literal.identifier.poly_args); }, + Literal::Union(literal) => { + literal.poly_args2.extend(&literal.identifier.poly_args); + } _ => { debug_assert!(false, "called visit_literal_poly_args on a non-polymorphic literal"); unreachable!(); @@ -1732,7 +1875,16 @@ impl ValidityAndLinkerVisitor { defined_type, maybe_poly_args, literal.identifier.position ).as_parse_error(&ctx.heap, &ctx.module.source)?; - println!("DEBUG: poly args 2: {:?}", &literal.poly_args2); + (num_to_infer, &literal.poly_args2) + }, + Literal::Union(literal) => { + let defined_type = ctx.types.get_base_definition(literal.definition.as_ref().unwrap()) + .unwrap(); + let maybe_poly_args = literal.identifier.get_poly_args(); + let num_to_infer = match_polymorphic_args_to_vars( + defined_type, maybe_poly_args, literal.identifier.position + ).as_parse_error(&ctx.heap, &ctx.module.source)?; + (num_to_infer, &literal.poly_args2) } }; @@ -1756,6 +1908,7 @@ impl ValidityAndLinkerVisitor { let poly_args = match &mut ctx.heap[lit_id].value { Literal::Struct(literal) => &mut literal.poly_args2, Literal::Enum(literal) => &mut literal.poly_args2, + Literal::Union(literal) => &mut literal.poly_args2, _ => unreachable!(), }; poly_args.reserve(num_poly_args_to_infer); diff --git a/src/protocol/tests/parser_imports.rs b/src/protocol/tests/parser_imports.rs index bbcbe96e11f2c0dc989c38b046713f13fb0d8be2..52b113ab255654806943064cefea64731bd6ef0e 100644 --- a/src/protocol/tests/parser_imports.rs +++ b/src/protocol/tests/parser_imports.rs @@ -242,9 +242,4 @@ fn test_illegal_import_use() { .assert_msg_has(0, "Could not resolve this identifier") .assert_occurs_at(0, "mod2::Foo"); }); -} - -// TODO: Test incorrect imports: -// 1. importing a module -// 2. import something a module imports -// 3. import something that doesn't exist in a module \ No newline at end of file +} \ No newline at end of file