diff --git a/language_spec.md b/language_spec.md new file mode 100644 index 0000000000000000000000000000000000000000..3af00076806801b64133a5c8527509989206d86f --- /dev/null +++ b/language_spec.md @@ -0,0 +1,291 @@ +# Protocol Description Language + +## Introduction + +## Grammar + +Beginning with the basics from which we'll construct the grammar, various characters and special variations thereof: + +``` +SP = " " // space +HTAB = 0x09 // horizontal tab +VCHAR = 0x21-0x7E // visible ASCII character +VCHAR-ESCLESS = 0x20-0x5B | 0x5D-0x7E // visible ASCII character without "\" +WSP = SP | HTAB // whitespace +ALPHA = 0x41-0x5A | 0x61-0x7A // characters (lower and upper case) +DIGIT = 0x30-0x39 // digit +NEWLINE = (0x15 0x0A) | 0x0A // carriage return and line feed, or just line feed + +// Classic backslash escaping to produce particular ASCII charcters +ESCAPE_CHAR = "\" +ESCAPED_CHARS = + ESCAPE_CHAR ESCAPE_CHAR | + ESCAPE_CHAR "t" | + ESCAPE_CHAR "r" | + ESCAPE_CHAR "n" | + ESCAPE_CHAR "0" | + ESCAPE_CHAR "'" | + ESCAPE_CHAR """ +``` + +Which are composed into the following components of an input file that do not directly contribute towards the AST: + +``` +// asterisk followed by any ASCII char, excluding "/", or just any ASCII char without "*" +block-comment-contents = "*" (0x00-0x2E | 0x30-0x7E) | (0x20-0x29 | 0x2B-0x7E) +block-comment = "/*" block-comment-contents* "*/" +line-comment = "//" (WSP | VCHAR)* NEWLINE +comment = block-comment | line-comment +cw = (comment | WSP | NEWLINE)* +cwb = (comment | WSP | newline)+ +``` + +Where it should be noted that the `cw` rule allows for not encountering any of the indicated characters, while the `cwb` rule expects at least one instance. + +The following operators are defined: + +``` +binary-operator = "||" | "&&" | + "!=" | "==" | "<=" | ">=" | "<" | ">" | + "|" | "&" | "^" | "<<" | ">>" | + "+" | "-" | "*" | "/" | "%" +assign-operator = "=" | + "|=" | "&=" | "^=" | "<<=" | ">>=" | + "+=" | "-=" | "*=" | "/=" | "%=" +unary-operator = "++" | "--" | "+" | "-" | "~" | "!" +``` + +**QUESTION**: Do we include the pre/postfix "++" and "--" operators? They were introduced in C to reduce the amount of required characters. But is still necessary? + +And to define various constants in the language, we allow for the following: + +``` +// Various integer constants, binary, octal, decimal, or hexadecimal, with a +// utility underscore to enhance humans reading the characters. Allowing use to +// write something like 100_000_256 or 0xDEAD_BEEF +int-bin-char = "0" | "1" +int-bin-constant = "0b" int-bin-char (int-bin-char | "_")* // 0b0100_1110 +int-oct-char = "0"-"7" +int-oct-constant = "0o" int-oct-char (int-oct-char | "_")* // 0o777 +int-dec-constant = DIGIT (DIGIT | "_")* // +int-hex-char = DIGIT | "a"-"f" | "A"-"F" +int-hex-constant = "0x" int-hex-char (int-hex-char | "_")* // 0xFEFE_1337 +int-constant = int-bin-constant | int-oct-constant | int-dec-constant | int-hex-constant + +// Floating point numbers +// TODO: Maybe support exponential notation? Seems silly for a networking +// language, but might be useful? +float-constant = DIGIT* "." DIGIT+ + +// Character constants: a single character. Its element may be an escaped +// character or a VCHAR (excluding "'" and "\") +char-element = ESCAPED_CHARS | (0x20-0x26 | 0x28-0x5B | 0x5D-0x7E) +char-constant = "'" char-element "'" + +// Same thing for strings, but these may contain 0 or more characters +str-element = ESCAPED_CHARS | (0x20-0x21 | 0x23-0x5B | 0x5D-0x7E) +str-constant = """ str-element* """ +``` + +Note that the integer characters are forced, somewhat arbitrarily without hampering the programmer's expressiveness, to start with a valid digit. Only then may one introduce the `_` character. And non-rigorously speaking characters may not contain an unescaped `'`-character, and strings may not contain an unescaped `"`-character. + +We now introduce the various identifiers that exist within the language, we make a distinction between "any identifier" and "any identifier except for the builtin ones". Because we h + +``` +identifier-any = ALPHA | (ALPHA | DIGIT | "_")* +keyword = + "composite" | "primitive" | + type-primitive | "true" | "false" | "null" | + "struct" | "enum" | + "if" | "else" | + "while" | "break" | "continue" | "return" | + "synchronous" | "assert" | + "goto" | "skip" | "new" | "let" +builtin = "put" | "get" | "fires" | "create" | "sizeof" | "assert" +identifier = identifier-any WITHOUT (keyword | builtin) + +// Identifier with any number of prefixed namespaces +ns-identifier = (identifier "::")* identifier +``` + +We then start introducing the type system. Learning from the "mistake" of C/C++ of having types like `byte` and `short` with unspecified and compiler-dependent byte-sizes (followed by everyone using `stdint.h`), we use the Rust/Zig-like `u8`, `i16`, etc. Currently we will limit the programmer to not produce integers which take up more than 64 bits. Furthermore, as one is writing network code, it would be quite neat to be able to put non-byte-aligned integers into a struct in order to directly access meaningful bits. Hence, with restrictions introduced later, we will allow for types like `i4` or `u1`. When actually retrieving them or performing computations with them we will use the next-largest byte-size to operate on them in "registers". + +**Question**: Difference between u1 and bool? Do we allow assignments between them? What about i1 and bool? + +As the language semantics are value-based, we are prevented from returning information from functions through its arguments. We may only return information through its (single) return value. If we consider the common case of having to parse a series of bytes into a meaningful struct, we cannot return both the struct and a value as a success indicator. For this reason, we introduce algebraic datatypes (or: tagged unions, or: enums) as well. + +Lastly, since functions are currently without internal side-effects (since functions cannot perform communication with components, and there is no functionality to interact "with the outside world" from within a function), it does not make sense to introduce the "void" type, as found in C/C++ to indicate that a function doesn't return anything of importance. However, internally we will allow for a "void" type, this will allow treating builtins such as "assert" and "put" like functions while constructing and evaluating the AST. + +``` +// The digits 1-64, without any leading zeros allowed, to allow specifying the +// signed and unsigned integer types +number-1-64 = NZ-DIGIT | (0x31-0x35 DIGIT) | ("6" 0x30-0x34) +type-signed-int = "i" number-1-64 // i1 through i64 +type-unsigned-int = "u" number-1-64 // u1 through u64 + +// Standard floats and bools +type-float = "f32" | "f64" +type-bool = "bool" + +// Messages, may be removed later +type-msg = "msg" + +// Indicators of port types +type-port = "in" | "out" + +// Unions and tagged unions, so we allow: +// enum SpecialBool { True, False } +// enum SpecialBool{True,False,} +// enum Tagged{Boolean(bool),SignedInt(i64),UnsignedInt(u64),Nothing} +type-union-element = identifier cw (("(" cw type cw ")") | ("=" cw int-constant))? +type-union-def = "enum" cwb identifier cw "{" cw type-union-element (cw "," cw type-union-element)* (cw ",")? cw "}" + +// Structs, so we allow: +// struct { u8 type, u2 flag0, u6 reserved } +type-struct-element = type cwb identifier +type-struct-def = "struct" cwb identifier cw "{" cw type-struct-element (cw "," cw type-struct-element)* (cw ",")? cw "}" + +type-primitive = type-signed-int | + type-unsigned-int | + type-float | + type-bool | + type-msg | + type-port + +// A type may be a user-defined type (e.g. "struct Bla"), a namespaced +// user type (e.g. "Module::Bla"), or a non-namespaced primitive type. We +// currently have no way (yet) to access nested modules, so we don't need to +// care about identifier nesting. +type = type-primitive | ns-identifier +``` + +With these types, we need to introduce some extra constant types. Ones that are used to construct struct instances and ones that are used to construct/assign enums. These are constructed as: + +``` +// Struct literals +struct-constant-element = identifier cw ":" cw expr +struct-constant = ns-identifier cw "{" cw struct-constant-element (cw "," struct-constant-element)* cw "}" + +enum-constant = ns-identifier "::" identifier cw "(" cw expr cw ")" +``` + +Finally, we declare methods and field accessors as: + +``` +method = builtin | ns-identifier + +field = "length" | identifier +``` + +**Question**: This requires some discussion. We allow for a "length" field on messages, and allow the definition of arrays. But if we wish to perform computation in a simple fashion, we need to allow for variable-length arrays of custom types. This requires builtin methods like "push", "pop", etc. But I suppose there is a much nicer way... In any case, this reminds me of programming in Fortran, which I definitely don't want to impose on other people (that, or I will force 72-character line lengths on them as well) + +When we parse a particular source file, we may expect the following "pragmas" to be sprinkled at the top of the source +file. They may exist at any position in the global scope of a source file. + +``` +// A domain identifier is a dot-separated sequence of identifiers. As these are +// only used to identify modules we allow any identifier to be used in them. +// The exception is the last identifier, which we, due to namespacing rules, +// force to be a non-reserved identifier. +domain-identifier = (identifier-any ".")* identifier + +pragma-version = "#version" cwb int-constant cw ";" // e.g. #version 500 +pragma-module = "#module" cwb domain-identifier cw ";" // e.g. #module hello.there + +// Import, e.g. +// #import module.submodule // access through submodule::function(), or submodule::Type +// #import module.submodule as Sub // access through Sub::function(), or Sub::type +// #import module.submodule::* // access through function(), or Type +// #import module.submodule::{function} // access through function() +// #import module.submodule::{function as func, type} // access through func() or type + +pragma-import-alias = cwb "as" cwb identifier +pragma-import-all = "::*" +pragma-import-single-symbol = "::" identifier pragma-import-alias? +pragma-import-multi-symbol = "::{" ... + cw identifier pragma-import-alias? ... + (cw "," cw identifier pragma-import-alias?)* ... + (cw ",")? cw "}" +pragma-import = "#import" cwb domain-identifier ... + (pragma-import-alias | pragma-import-all | pragma-import-single-symbol | pragma-import-multi-symbol)? + +// Custom pragmas for people which may be using (sometime, somewhere) +// metaprogramming with pragmas +pragma-custom = "#" identifier-any (cwb VCHAR (VCHAR | WS)*) cw ";" + +// Finally, a pragma may be any of the ones above +pragma = pragma-version | pragma-module | pragma-import | pragma-custom +``` + +Note that, different from C-like languages, we do require semicolons to exist at the end of a pragma statement. The reason is to prevent future hacks using the "\" character to indicate an end-of-line-but-not-really-end-of-line statements. + +Apart from these pragmas, we can have component definitions, type definitions and function definitions within the source file. The grammar for these may be formulated as: + +``` +// Annotated types and function/component arguments +type-annotation = type (cw [])? +var-declaration = type-annotation cwb identifier +params-list = "(" cw (var-declaration (cw "," cw var-declaration)*)? cw ")" + +// Functions and components +function-def = type-annotation cwb identifier cw params-list cw block +composite-def = "composite" cwb identifier cw params-list cw block +primitive-def = "primitive" cwb identifier cw params-list cw block +component-def = composite-def | primitive-def + +// Symbol definitions now become +symbol-def = type-union-def | type-struct-def | function-def | component-def +``` + +Using these rules, we can now describe the grammar of a single file as: + +``` +file = cw (pragma | symbol-def)* cw +``` + +Of course, we currently cannot do anything useful with our grammar, hence we have to describe blocks to let the functions and component definitions do something. To do so, we proceed as: + +``` +// channel a->b;, or channel a -> b; +channel-decl = channel cwb identifier cw "->" cw identifier cw ";" +// int a = 5, b = 2 + 3; +memory-decl = var-declaration cw "=" cw expression (cw "," cw identifier cw "=" cw expression)* cw ";" + +stmt = block | + identifier cw ":" cw stmt | // label + "if" cw pexpr cw stmt (cw "else" cwb stmt)? | + "while" cw pexpr cw stmt | + "break" (cwb identifier)? cw ";" | + "continue" (cwb identifier)? cw ";" | + "synchronous" stmt | + "return" cwb identifier cw ";" | + "goto" cwb identifier cw ";" | + "skip" cw ";" | + "new" cwb method-expr cw ";" | + expr cw ";" + +// TODO: Add all the other expressions +// TODO: Also: add struct construction and enum construction +method-params-list = "(" cw (expr (cw "," cw expr)* )? cw ")" +method-expr = method cw method-params-list + +enum-destructure-expr = "let" cw ns-identifier "::" identifier cw "(" cw identifier cw ")" cw "=" expr +enum-test-expr = ns-identifier "::" identifier cw "==" cw expr + +block = "{" (cw (channel-decl | memory-decl | stmt))* cw "}" +``` + +Note that we have a potential collision of various expressions/statements. The following cases are of importance: + +1. An empty block is written as `{}`, while an empty array construction is also written as `{}`. +2. Both function calls as enum constants feature the same construction syntax. That is: `foo::bar(expression)` may refer to a function call to `bar` in the namespace `foo`, but may also be the construction of enum `foo`'s `bar` variant (containing a value `expression`). These may be disambiguated using the type system. +3. The enumeration destructuring expression may collide with the constant enumeration literal. These may be disambiguated by looking at the inner value. If the inner value is an identifier and not yet defined as a variable, then it is a destructuring expression. Otherwise it must be interpreted as a constant enumeration. The enumeration destructuring expression must then be completed by it being a child of an binary equality operator. If not, then it is invalid syntax. + +Finally, for consistency, there are additional rules to the enumeration destructuring. As a preamble: the language should allow programmers to express any kind of trickery they want, as long as it is correct. But programmers should be prevented from expressing something that is by definition incorrect/illogical. So enumeration destructuring (e.g. `Enum::Variant(bla) == expression`) should return a value with a special type (e.g. `EnumDestructureBool`) that may only reside within the testing expressions of `if` and `while` statements. Furthermore, this special boolean type only supports the logical-and (`&&`) operator. This way we prevent invalid expressions such as `if (Enum::Variant1(foo) == expr || Enum::Variant2(bar) == expr) { ... }`, but we do allow potentially valid expressions like `if (Enum::Variant1(foo) == expr_foo && Enum::Variant2(bar) == expr_bar) { ... }`. + +**Question**: In the documentation for V1.0 we find the `synchronous cw (params-list cw stmt | block)` rule. Why the `params-list`? + +**TODO**: Release constructions on memory declarations: as long as we have a write to it before a read we should be fine. Can be done once we add semantic analysis in order to optimize putting and getting port values. +**TODO**: Implement type inference, should be simpler once I figure out how to write a typechecker. +**TODO**: Add constants assigned in the global scope. +**TODO**: Add a runtime expression evaluator (probably before constants in global scope) to simplify expressions and/or remove impossible branches. \ No newline at end of file diff --git a/src/protocol/arena.rs b/src/protocol/arena.rs index ad5356c82fc4dcb769f1205b58a61bb81ef69c16..2120f5d9b983c1af93ba6530e5a6565e15d101eb 100644 --- a/src/protocol/arena.rs +++ b/src/protocol/arena.rs @@ -4,7 +4,7 @@ use core::marker::PhantomData; #[derive(serde::Serialize, serde::Deserialize)] pub struct Id { - index: u32, + pub(crate) index: u32, _phantom: PhantomData, } #[derive(Debug, serde::Serialize, serde::Deserialize)] @@ -35,6 +35,7 @@ impl Hash for Id { self.index.hash(h); } } + impl Arena { pub fn new() -> Self { Self { store: vec![] } @@ -48,8 +49,8 @@ impl Arena { self.store.push(f(id)); id } - pub fn iter(&self) -> impl Iterator, &T)> { - (0..).map(|index| Id { index, _phantom: Default::default() }).zip(self.store.iter()) + pub fn iter(&self) -> impl Iterator { + self.store.iter() } } impl core::ops::Index> for Arena { @@ -62,4 +63,4 @@ impl core::ops::IndexMut> for Arena { fn index_mut(&mut self, id: Id) -> &mut Self::Output { self.store.index_mut(id.index as usize) } -} +} \ No newline at end of file diff --git a/src/protocol/ast.rs b/src/protocol/ast.rs index e28067b5849b6d42322ef7524386c96b9a5aff3e..f7a570318056ec6c02182e5188fc14bb97974fda 100644 --- a/src/protocol/ast.rs +++ b/src/protocol/ast.rs @@ -3,11 +3,12 @@ use std::fmt::{Debug, Display, Formatter}; use std::ops::{Index, IndexMut}; use super::arena::{Arena, Id}; +// use super::containers::StringAllocator; use crate::protocol::inputsource::*; -#[derive(Debug, Clone, Copy, PartialEq, serde::Serialize, serde::Deserialize)] -pub struct RootId(Id); +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, serde::Serialize, serde::Deserialize)] +pub struct RootId(pub(crate) Id); #[derive(Debug, Clone, Copy, PartialEq, serde::Serialize, serde::Deserialize)] pub struct PragmaId(Id); @@ -15,27 +16,6 @@ pub struct PragmaId(Id); #[derive(Debug, Clone, Copy, PartialEq, serde::Serialize, serde::Deserialize)] pub struct ImportId(Id); -#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, serde::Serialize, serde::Deserialize)] -pub struct IdentifierId(Id); - -#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, serde::Serialize, serde::Deserialize)] -pub struct SourceIdentifierId(IdentifierId); - -impl SourceIdentifierId { - pub fn upcast(self) -> IdentifierId { - self.0 - } -} - -#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, serde::Serialize, serde::Deserialize)] -pub struct ExternalIdentifierId(IdentifierId); - -impl ExternalIdentifierId { - pub fn upcast(self) -> IdentifierId { - self.0 - } -} - #[derive(Debug, Clone, Copy, PartialEq, serde::Serialize, serde::Deserialize)] pub struct TypeAnnotationId(Id); @@ -63,6 +43,24 @@ impl LocalId { #[derive(Debug, Clone, Copy, PartialEq, serde::Serialize, serde::Deserialize)] pub struct DefinitionId(Id); +#[derive(Debug, Clone, Copy, PartialEq, serde::Serialize, serde::Deserialize)] +pub struct StructId(DefinitionId); + +impl StructId { + pub fn upcast(self) -> DefinitionId{ + self.0 + } +} + +#[derive(Debug, Clone, Copy, PartialEq, serde::Serialize, serde::Deserialize)] +pub struct EnumId(DefinitionId); + +impl EnumId { + pub fn upcast(self) -> DefinitionId{ + self.0 + } +} + #[derive(Debug, Clone, Copy, PartialEq, serde::Serialize, serde::Deserialize)] pub struct ComponentId(DefinitionId); @@ -407,14 +405,18 @@ impl ImportedDeclarationId { #[derive(Debug, serde::Serialize, serde::Deserialize)] pub struct Heap { - // Phase 0: allocation + // Allocators + // #[serde[skip]] string_alloc: StringAllocator, + // Root arena, contains the entry point for different modules. Each root + // contains lists of IDs that correspond to the other arenas. protocol_descriptions: Arena, + // Contents of a file, these are the elements the `Root` elements refer to pragmas: Arena, - imports: Arena, + pub(crate) imports: Arena, identifiers: Arena, type_annotations: Arena, variables: Arena, - definitions: Arena, + pub(crate) definitions: Arena, statements: Arena, expressions: Arena, declarations: Arena, @@ -423,6 +425,7 @@ pub struct Heap { impl Heap { pub fn new() -> Heap { Heap { + // string_alloc: StringAllocator::new(), protocol_descriptions: Arena::new(), pragmas: Arena::new(), imports: Arena::new(), @@ -435,25 +438,6 @@ impl Heap { declarations: Arena::new(), } } - pub fn alloc_source_identifier( - &mut self, - f: impl FnOnce(SourceIdentifierId) -> SourceIdentifier, - ) -> SourceIdentifierId { - SourceIdentifierId(IdentifierId( - self.identifiers - .alloc_with_id(|id| Identifier::Source(f(SourceIdentifierId(IdentifierId(id))))), - )) - } - pub fn alloc_external_identifier( - &mut self, - f: impl FnOnce(ExternalIdentifierId) -> ExternalIdentifier, - ) -> ExternalIdentifierId { - ExternalIdentifierId(IdentifierId( - self.identifiers.alloc_with_id(|id| { - Identifier::External(f(ExternalIdentifierId(IdentifierId(id)))) - }), - )) - } pub fn alloc_type_annotation( &mut self, f: impl FnOnce(TypeAnnotationId) -> TypeAnnotation, @@ -739,6 +723,16 @@ impl Heap { }), )) } + pub fn alloc_struct_definition(&mut self, f: impl FnOnce(StructId) -> StructDefinition) -> StructId { + StructId(DefinitionId(self.definitions.alloc_with_id(|id| { + Definition::Struct(f(StructId(DefinitionId(id)))) + }))) + } + pub fn alloc_enum_definition(&mut self, f: impl FnOnce(EnumId) -> EnumDefinition) -> EnumId { + EnumId(DefinitionId(self.definitions.alloc_with_id(|id| { + Definition::Enum(f(EnumId(DefinitionId(id)))) + }))) + } pub fn alloc_composite(&mut self, f: impl FnOnce(CompositeId) -> Composite) -> CompositeId { CompositeId(ComponentId(DefinitionId(self.definitions.alloc_with_id(|id| { Definition::Component(Component::Composite(f(CompositeId(ComponentId(DefinitionId( @@ -786,16 +780,6 @@ impl Heap { }), )) } - - pub fn get_external_identifier(&mut self, ident: &[u8]) -> ExternalIdentifierId { - for (_, id) in self.identifiers.iter() { - if id.is_external() && id.ident() == ident { - return id.as_external().this; - } - } - // Not found - self.alloc_external_identifier(|this| ExternalIdentifier { this, value: ident.to_vec() }) - } } impl Index for Heap { @@ -825,24 +809,9 @@ impl Index for Heap { } } -impl Index for Heap { - type Output = Identifier; - fn index(&self, index: IdentifierId) -> &Self::Output { - &self.identifiers[index.0] - } -} - -impl Index for Heap { - type Output = SourceIdentifier; - fn index(&self, index: SourceIdentifierId) -> &Self::Output { - &self.identifiers[(index.0).0].as_source() - } -} - -impl Index for Heap { - type Output = ExternalIdentifier; - fn index(&self, index: ExternalIdentifierId) -> &Self::Output { - &self.identifiers[(index.0).0].as_external() +impl IndexMut for Heap { + fn index_mut(&mut self, index: ImportId) -> &mut Self::Output { + &mut self.imports[index.0] } } @@ -1213,26 +1182,19 @@ pub struct Root { } impl Root { - pub fn get_definition(&self, h: &Heap, id: IdentifierId) -> Option { - for &def in self.definitions.iter() { - if h[h[def].identifier()] == h[id] { - return Some(def); - } - } - None - } pub fn get_definition_ident(&self, h: &Heap, id: &[u8]) -> Option { for &def in self.definitions.iter() { - if h[h[def].identifier()].ident() == id { + if h[def].identifier().value == id { return Some(def); } } None } - pub fn get_declaration(&self, h: &Heap, id: IdentifierId) -> Option { - for &decl in self.declarations.iter() { - if h[h[decl].identifier()] == h[id] { - return Some(decl); + pub fn get_declaration(&self, h: &Heap, id: &Identifier) -> Option { + for declaration_id in self.declarations.iter() { + let declaration = &h[*declaration_id]; + if declaration.identifier().value == id.value { + return Some(*declaration_id); } } None @@ -1282,127 +1244,136 @@ impl SyntaxElement for PragmaOld { } #[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] -pub struct Import { - pub this: ImportId, - // Phase 1: parser - pub position: InputPosition, - pub value: Vec, -} - -impl SyntaxElement for Import { - fn position(&self) -> InputPosition { - self.position - } -} - -#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] -pub enum Identifier { - External(ExternalIdentifier), - Source(SourceIdentifier), +pub enum Import { + Module(ImportModule), + Symbols(ImportSymbols) } -impl Identifier { - pub fn as_source(&self) -> &SourceIdentifier { +impl Import { + pub(crate) fn as_module(&self) -> &ImportModule { match self { - Identifier::Source(result) => result, - _ => panic!("Unable to cast `Identifier` to `SourceIdentifier`"), + Import::Module(m) => m, + _ => panic!("Unable to cast 'Import' to 'ImportModule'") } } - pub fn is_external(&self) -> bool { + pub(crate) fn as_symbols(&self) -> &ImportSymbols { match self { - Identifier::External(_) => true, - _ => false, - } - } - pub fn as_external(&self) -> &ExternalIdentifier { - match self { - Identifier::External(result) => result, - _ => panic!("Unable to cast `Identifier` to `ExternalIdentifier`"), - } - } - fn ident(&self) -> &[u8] { - match self { - Identifier::External(eid) => eid.ident(), - Identifier::Source(sid) => sid.ident(), + Import::Symbols(m) => m, + _ => panic!("Unable to cast 'Import' to 'ImportSymbols'") } } } -impl Display for Identifier { - fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { - // A source identifier is in ASCII range. - write!(f, "{}", String::from_utf8_lossy(self.ident())) - } -} - -impl PartialEq for Identifier { - fn eq(&self, rhs: &Identifier) -> bool { - self.ident() == rhs.ident() +impl SyntaxElement for Import { + fn position(&self) -> InputPosition { + match self { + Import::Module(m) => m.position, + Import::Symbols(m) => m.position + } } } -impl PartialEq for Identifier { - fn eq(&self, rhs: &SourceIdentifier) -> bool { - self.ident() == rhs.ident() - } +#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] +pub struct ImportModule { + pub this: ImportId, + // Phase 1: parser + pub position: InputPosition, + pub module_name: Vec, + pub alias: Vec, + // Phase 2: module resolving + pub module_id: Option, } #[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] -pub struct ExternalIdentifier { - pub this: ExternalIdentifierId, +pub struct AliasedSymbol { // Phase 1: parser - pub value: Vec, + pub position: InputPosition, + pub name: Vec, + pub alias: Vec, + // Phase 2: symbol resolving + pub definition_id: Option, } -impl ExternalIdentifier { - fn ident(&self) -> &[u8] { - &self.value - } +#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] +pub struct ImportSymbols { + pub this: ImportId, + // Phase 1: parser + pub position: InputPosition, + pub module_name: Vec, + // Phase 2: module resolving + pub module_id: Option, + // Phase 1&2 + // if symbols is empty, then we implicitly import all symbols without any + // aliases for them. If it is not empty, then symbols are explicitly + // specified, and optionally given an alias. + pub symbols: Vec, } -impl Display for ExternalIdentifier { - fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { - // A source identifier is in ASCII range. - write!(f, "{}", String::from_utf8_lossy(&self.value)) - } +#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] +pub struct Identifier { + pub position: InputPosition, + pub value: Vec } #[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] -pub struct SourceIdentifier { - pub this: SourceIdentifierId, - // Phase 1: parser +pub struct NamespacedIdentifier { pub position: InputPosition, + pub num_namespaces: u8, pub value: Vec, } -impl SourceIdentifier { - pub fn ident(&self) -> &[u8] { - &self.value +impl NamespacedIdentifier { + fn iter(&self) -> NamespacedIdentifierIter { + NamespacedIdentifierIter{ + value: &self.value, + cur_offset: 0, + num_returned: 0, + num_total: self.num_namespaces + } } } -impl SyntaxElement for SourceIdentifier { - fn position(&self) -> InputPosition { - self.position - } +struct NamespacedIdentifierIter<'a> { + value: &'a Vec, + cur_offset: usize, + num_returned: u8, + num_total: u8, } -impl Display for SourceIdentifier { - fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { - // A source identifier is in ASCII range. - write!(f, "{}", String::from_utf8_lossy(&self.value)) - } -} +impl<'a> Iterator for NamespacedIdentifierIter<'a> { + type Item = &'a [u8]; + fn next(&mut self) -> Option { + if self.cur_offset >= self.value.len() { + debug_assert_eq!(self.num_returned, self.num_total); + None + } else { + debug_assert!(self.num_returned < self.num_total); + let start = self.cur_offset; + let mut end = start; + while end < self.value.len() - 1 { + if self.value[end] == b':' && self.value[end + 1] == b':' { + self.cur_offset = end + 2; + self.num_returned += 1; + return Some(&self.value[start..end]); + } + end += 1; + } -impl PartialEq for SourceIdentifier { - fn eq(&self, rhs: &Identifier) -> bool { - self.ident() == rhs.ident() + // If NamespacedIdentifier is constructed properly, then we cannot + // end with "::" in the value, so + debug_assert!(end == 0 || (self.value[end - 1] != b':' && self.value[end] != b':')); + debug_assert_eq!(self.num_returned + 1, self.num_total); + self.cur_offset = self.value.len(); + self.num_returned += 1; + return Some(&self.value[start..]); + } } } -impl PartialEq for SourceIdentifier { - fn eq(&self, rhs: &SourceIdentifier) -> bool { - self.ident() == rhs.ident() +impl Display for Identifier { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + // A source identifier is in ASCII range. + write!(f, "{}", String::from_utf8_lossy(&self.value)) } } @@ -1519,13 +1490,13 @@ pub enum Method { Get, Fires, Create, - Symbolic(SourceIdentifierId), + Symbolic(Identifier), } #[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] pub enum Field { Length, - Symbolic(SourceIdentifierId), + Symbolic(Identifier), } impl Field { pub fn is_length(&self) -> bool { @@ -1554,7 +1525,7 @@ impl Scope { pub trait VariableScope { fn parent_scope(&self, h: &Heap) -> Option; - fn get_variable(&self, h: &Heap, id: SourceIdentifierId) -> Option; + fn get_variable(&self, h: &Heap, id: &Identifier) -> Option; } impl VariableScope for Scope { @@ -1565,7 +1536,7 @@ impl VariableScope for Scope { Scope::Synchronous(stmt) => h[*stmt].parent_scope(h), } } - fn get_variable(&self, h: &Heap, id: SourceIdentifierId) -> Option { + fn get_variable(&self, h: &Heap, id: &Identifier) -> Option { match self { Scope::Definition(def) => h[*def].get_variable(h, id), Scope::Block(stmt) => h[*stmt].get_variable(h, id), @@ -1581,10 +1552,10 @@ pub enum Variable { } impl Variable { - pub fn identifier(&self) -> SourceIdentifierId { + pub fn identifier(&self) -> &Identifier { match self { - Variable::Parameter(var) => var.identifier, - Variable::Local(var) => var.identifier, + Variable::Parameter(var) => &var.identifier, + Variable::Local(var) => &var.identifier, } } pub fn is_parameter(&self) -> bool { @@ -1628,7 +1599,7 @@ pub struct Parameter { // Phase 1: parser pub position: InputPosition, pub type_annotation: TypeAnnotationId, - pub identifier: SourceIdentifierId, + pub identifier: Identifier, } impl SyntaxElement for Parameter { @@ -1643,7 +1614,7 @@ pub struct Local { // Phase 1: parser pub position: InputPosition, pub type_annotation: TypeAnnotationId, - pub identifier: SourceIdentifierId, + pub identifier: Identifier, } impl SyntaxElement for Local { fn position(&self) -> InputPosition { @@ -1653,11 +1624,37 @@ impl SyntaxElement for Local { #[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] pub enum Definition { + Struct(StructDefinition), + Enum(EnumDefinition), Component(Component), Function(Function), } impl Definition { + pub fn is_struct(&self) -> bool { + match self { + Definition::Struct(_) => true, + _ => false + } + } + pub fn as_struct(&self) -> &StructDefinition { + match self { + Definition::Struct(result) => result, + _ => panic!("Unable to cast 'Definition' to 'StructDefinition'"), + } + } + pub fn is_enum(&self) -> bool { + match self { + Definition::Enum(_) => true, + _ => false, + } + } + pub fn as_enum(&self) -> &EnumDefinition { + match self { + Definition::Enum(result) => result, + _ => panic!("Unable to cast 'Definition' to 'EnumDefinition'"), + } + } pub fn is_component(&self) -> bool { match self { Definition::Component(_) => true, @@ -1682,22 +1679,29 @@ impl Definition { pub fn as_primitive(&self) -> &Primitive { self.as_component().as_primitive() } - pub fn identifier(&self) -> SourceIdentifierId { + pub fn identifier(&self) -> &Identifier { match self { + Definition::Struct(def) => &def.identifier, + Definition::Enum(def) => &def.identifier, Definition::Component(com) => com.identifier(), - Definition::Function(fun) => fun.identifier, + Definition::Function(fun) => &fun.identifier, } } pub fn parameters(&self) -> &Vec { + // TODO: Fix this + static EMPTY_VEC: Vec = Vec::new(); match self { Definition::Component(com) => com.parameters(), Definition::Function(fun) => &fun.parameters, + _ => &EMPTY_VEC, } } pub fn body(&self) -> StatementId { + // TODO: Fix this match self { Definition::Component(com) => com.body(), Definition::Function(fun) => fun.body, + _ => panic!("cannot retrieve body (for EnumDefinition or StructDefinition)") } } } @@ -1705,6 +1709,8 @@ impl Definition { impl SyntaxElement for Definition { fn position(&self) -> InputPosition { match self { + Definition::Struct(def) => def.position, + Definition::Enum(def) => def.position, Definition::Component(def) => def.position(), Definition::Function(def) => def.position(), } @@ -1715,16 +1721,56 @@ impl VariableScope for Definition { fn parent_scope(&self, _h: &Heap) -> Option { None } - fn get_variable(&self, h: &Heap, id: SourceIdentifierId) -> Option { - for ¶m in self.parameters().iter() { - if h[h[param].identifier] == h[id] { - return Some(param.0); + fn get_variable(&self, h: &Heap, id: &Identifier) -> Option { + for ¶meter_id in self.parameters().iter() { + let parameter = &h[parameter_id]; + if parameter.identifier.value == id.value { + return Some(parameter_id.0); } } None } } +#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] +pub struct StructFieldDefinition { + pub position: InputPosition, + pub field: Identifier, + pub the_type: TypeAnnotationId, +} + +#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] +pub struct StructDefinition { + pub this: StructId, + // Phase 1: parser + pub position: InputPosition, + pub identifier: Identifier, + pub fields: Vec +} + +#[derive(Debug, Clone, serde::Serialize, serde::Deserialize, PartialEq)] +pub enum EnumVariantValue { + None, + Integer(i64), + Type(TypeAnnotationId), +} + +#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] +pub struct EnumVariantDefinition { + pub position: InputPosition, + pub identifier: Identifier, + pub value: EnumVariantValue, +} + +#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] +pub struct EnumDefinition { + pub this: EnumId, + // Phase 1: parser + pub position: InputPosition, + pub identifier: Identifier, + pub variants: Vec, +} + #[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] pub enum Component { Composite(Composite), @@ -1750,10 +1796,10 @@ impl Component { _ => panic!("Unable to cast `Component` to `Primitive`"), } } - fn identifier(&self) -> SourceIdentifierId { + fn identifier(&self) -> &Identifier { match self { - Component::Composite(com) => com.identifier, - Component::Primitive(prim) => prim.identifier, + Component::Composite(com) => &com.identifier, + Component::Primitive(prim) => &prim.identifier, } } pub fn parameters(&self) -> &Vec { @@ -1784,7 +1830,7 @@ pub struct Composite { pub this: CompositeId, // Phase 1: parser pub position: InputPosition, - pub identifier: SourceIdentifierId, + pub identifier: Identifier, pub parameters: Vec, pub body: StatementId, } @@ -1800,7 +1846,7 @@ pub struct Primitive { pub this: PrimitiveId, // Phase 1: parser pub position: InputPosition, - pub identifier: SourceIdentifierId, + pub identifier: Identifier, pub parameters: Vec, pub body: StatementId, } @@ -1817,7 +1863,7 @@ pub struct Function { // Phase 1: parser pub position: InputPosition, pub return_type: TypeAnnotationId, - pub identifier: SourceIdentifierId, + pub identifier: Identifier, pub parameters: Vec, pub body: StatementId, } @@ -1841,7 +1887,7 @@ impl Declaration { Declaration::Imported(decl) => &decl.signature, } } - pub fn identifier(&self) -> IdentifierId { + pub fn identifier(&self) -> &Identifier { self.signature().identifier() } pub fn is_component(&self) -> bool { @@ -1882,16 +1928,18 @@ pub enum Signature { impl Signature { pub fn from_definition(h: &Heap, def: DefinitionId) -> Signature { + // TODO: Fix this match &h[def] { Definition::Component(com) => Signature::Component(ComponentSignature { - identifier: com.identifier().0, + identifier: com.identifier().clone(), // TODO: @fix arity: Signature::convert_parameters(h, com.parameters()), }), Definition::Function(fun) => Signature::Function(FunctionSignature { return_type: h[fun.return_type].the_type.clone(), - identifier: fun.identifier.0, + identifier: fun.identifier.clone(), // TODO: @fix arity: Signature::convert_parameters(h, &fun.parameters), }), + _ => panic!("cannot retrieve signature (for StructDefinition or EnumDefinition)") } } fn convert_parameters(h: &Heap, params: &Vec) -> Vec { @@ -1901,10 +1949,10 @@ impl Signature { } result } - fn identifier(&self) -> IdentifierId { + fn identifier(&self) -> &Identifier { match self { - Signature::Component(com) => com.identifier, - Signature::Function(fun) => fun.identifier, + Signature::Component(com) => &com.identifier, + Signature::Function(fun) => &fun.identifier, } } pub fn is_component(&self) -> bool { @@ -1923,14 +1971,14 @@ impl Signature { #[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] pub struct ComponentSignature { - pub identifier: IdentifierId, + pub identifier: Identifier, pub arity: Vec, } #[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] pub struct FunctionSignature { pub return_type: Type, - pub identifier: IdentifierId, + pub identifier: Identifier, pub arity: Vec, } @@ -2222,10 +2270,11 @@ impl VariableScope for BlockStatement { fn parent_scope(&self, _h: &Heap) -> Option { self.parent_scope } - fn get_variable(&self, h: &Heap, id: SourceIdentifierId) -> Option { - for &local in self.locals.iter() { - if h[h[local].identifier] == h[id] { - return Some(local.0); + fn get_variable(&self, h: &Heap, id: &Identifier) -> Option { + for local_id in self.locals.iter() { + let local = &h[*local_id]; + if local.identifier.value == id.value { + return Some(local_id.0); } } None @@ -2328,7 +2377,7 @@ pub struct LabeledStatement { pub this: LabeledStatementId, // Phase 1: parser pub position: InputPosition, - pub label: SourceIdentifierId, + pub label: Identifier, pub body: StatementId, // Phase 2: linker pub in_sync: Option, @@ -2407,7 +2456,7 @@ pub struct BreakStatement { pub this: BreakStatementId, // Phase 1: parser pub position: InputPosition, - pub label: Option, + pub label: Option, // Phase 2: linker pub target: Option, } @@ -2423,7 +2472,7 @@ pub struct ContinueStatement { pub this: ContinueStatementId, // Phase 1: parser pub position: InputPosition, - pub label: Option, + pub label: Option, // Phase 2: linker pub target: Option, } @@ -2455,10 +2504,11 @@ impl VariableScope for SynchronousStatement { fn parent_scope(&self, _h: &Heap) -> Option { self.parent_scope } - fn get_variable(&self, h: &Heap, id: SourceIdentifierId) -> Option { - for ¶m in self.parameters.iter() { - if h[h[param].identifier] == h[id] { - return Some(param.0); + fn get_variable(&self, h: &Heap, id: &Identifier) -> Option { + for parameter_id in self.parameters.iter() { + let parameter = &h[*parameter_id]; + if parameter.identifier.value == id.value { + return Some(parameter_id.0); } } None @@ -2514,7 +2564,7 @@ pub struct GotoStatement { pub this: GotoStatementId, // Phase 1: parser pub position: InputPosition, - pub label: SourceIdentifierId, + pub label: Identifier, // Phase 2: linker pub target: Option, } @@ -2897,7 +2947,7 @@ pub struct VariableExpression { pub this: VariableExpressionId, // Phase 1: parser pub position: InputPosition, - pub identifier: SourceIdentifierId, + pub identifier: Identifier, // Phase 2: linker pub declaration: Option, } diff --git a/src/protocol/containers.rs b/src/protocol/containers.rs new file mode 100644 index 0000000000000000000000000000000000000000..348bbaaca7c70d11bedb9fbbc65a26bfcbc5eed6 --- /dev/null +++ b/src/protocol/containers.rs @@ -0,0 +1,108 @@ +/// Containers.rs +/// +/// Contains specialized containers for the parser/compiler + +const PAGE_SIZE: usize = 4096; + +struct StringPage { + buffer: [u8; PAGE_SIZE], + remaining: usize, + next_page: Option>, +} + +impl StringPage { + fn new() -> Self{ + let res = Self{ + buffer: [0; PAGE_SIZE], + remaining: PAGE_SIZE, + next_page: None + }; + res + } +} + +/// Custom allocator for strings that are copied and remain valid during the +/// complete compilation phase. May perform multiple allocations but the +/// previously allocated strings remain valid. Because we will usually allocate +/// quite a number of these we will allocate a buffer upon construction of the +/// StringAllocator. +pub(crate) struct StringAllocator { + first_page: Box, + last_page: *mut StringPage, +} + +impl StringAllocator { + pub(crate) fn new() -> StringAllocator { + let mut page = Box::new(StringPage::new()); + let page_ptr = unsafe { page.as_mut() as *mut StringPage }; + StringAllocator{ + first_page: page, + last_page: page_ptr, + } + } + + pub(crate) fn alloc(&mut self, data: &[u8]) -> Result<&'static str, String> { + let data_len = data.len(); + if data_len > PAGE_SIZE { + return Err(format!( + "string is too large ({} bytes exceeds the maximum of {})", + data_len, PAGE_SIZE + )); + } + + // Because we're doing a copy anyway, we might as well perform the + // UTF-8 checking now. Such that it is safe to do an unchecked + // `from_utf8_unchecked` later. + let data = std::str::from_utf8(data); + if let Err(_) = data { + return Err(format!("invalid utf8-string")); + } + let data = data.unwrap(); + + unsafe { + if data_len > (*self.last_page).remaining { + // Allocate new page + let mut new_page = Box::new(StringPage::new()); + let new_page_ptr = new_page.as_mut() as *mut StringPage; + (*self.last_page).next_page = Some(new_page); + self.last_page = new_page_ptr; + } + + let remaining = (*self.last_page).remaining; + debug_assert!(data_len <= remaining); + let start = PAGE_SIZE - remaining; + (*self.last_page).buffer[start..start+data_len].copy_from_slice(data.as_bytes()); + (*self.last_page).remaining -= data_len; + + Ok(std::str::from_utf8_unchecked(&(*self.last_page).buffer[start..start+data_len])) + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_alloc() { + // Make sure pointers are somewhat correct + let mut alloc = StringAllocator::new(); + assert!(alloc.first_page.next_page.is_none()); + assert_eq!(alloc.first_page.as_ref() as *const StringPage, alloc.last_page); + + let input = "I am a simple static string"; + let filler = " ".repeat(PAGE_SIZE - input.len()); + let ref_first = alloc.alloc(input.as_bytes()).expect("alloc first"); + let ref_filler = alloc.alloc(filler.as_bytes()).expect("alloc filler"); + let ref_second = alloc.alloc(input.as_bytes()).expect("alloc second"); + + assert!(alloc.first_page.next_page.is_some()); + assert!(alloc.first_page.next_page.as_ref().unwrap().next_page.is_none()); + let last_page_ptr = alloc.first_page.next_page.as_ref().unwrap().as_ref() as *const StringPage; + assert_eq!(last_page_ptr, alloc.last_page); + + assert_eq!(ref_first, input); + assert_eq!(ref_filler, filler); + assert_eq!(ref_second, input); + } +} \ No newline at end of file diff --git a/src/protocol/eval.rs b/src/protocol/eval.rs index 1067b0e63a5e237820d4d75b49ba84ad0ad92b70..8b0131de87bcb525ea2055ffae180d5de0e2513d 100644 --- a/src/protocol/eval.rs +++ b/src/protocol/eval.rs @@ -1432,11 +1432,11 @@ impl Store { fn get(&mut self, h: &Heap, ctx: &mut EvalContext, rexpr: ExpressionId) -> EvalResult { match &h[rexpr] { Expression::Variable(var) => { - let var = var.declaration.unwrap(); + let var_id = var.declaration.unwrap(); let value = self .map - .get(&var) - .expect(&format!("Uninitialized variable {:?}", h[h[var].identifier()])); + .get(&var_id) + .expect(&format!("Uninitialized variable {:?}", String::from_utf8_lossy(&var.identifier.value))); Ok(value.clone()) } Expression::Indexing(indexing) => { @@ -1569,7 +1569,7 @@ impl Store { todo!() } Expression::Constant(expr) => Ok(Value::from_constant(&expr.value)), - Expression::Call(expr) => match expr.method { + Expression::Call(expr) => match &expr.method { Method::Create => { assert_eq!(1, expr.arguments.len()); let length = self.eval(h, ctx, expr.arguments[0])?; diff --git a/src/protocol/inputsource.rs b/src/protocol/inputsource.rs index f16da444b81e6f53a397487ec0ce14a34a11d235..858fdf25cd964674da1e9b859a5a6216de1e2aba 100644 --- a/src/protocol/inputsource.rs +++ b/src/protocol/inputsource.rs @@ -7,8 +7,8 @@ use backtrace::Backtrace; #[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] pub struct InputSource { - filename: String, - pub input: Vec, + pub(crate) filename: String, + pub(crate) input: Vec, line: usize, column: usize, offset: usize, @@ -87,9 +87,9 @@ impl InputSource { self.column = pos.column; self.offset = pos.offset; } - pub fn error(&self, message: S) -> ParseError { - self.pos().parse_error(message) - } + // pub fn error(&self, message: S) -> ParseError { + // self.pos().parse_error(message) + // } pub fn is_eof(&self) -> bool { self.next() == None } @@ -147,7 +147,7 @@ impl fmt::Display for InputSource { } } -#[derive(Debug, Clone, Copy, Default, serde::Serialize, serde::Deserialize)] +#[derive(Debug, Clone, Copy, serde::Serialize, serde::Deserialize)] pub struct InputPosition { line: usize, column: usize, @@ -167,14 +167,20 @@ impl InputPosition { } &source.input[start..end] } - fn parse_error(&self, message: S) -> ParseError { - ParseError { position: *self, message: message.to_string(), backtrace: Backtrace::new() } - } + // fn parse_error(&self, message: S) -> ParseError { + // ParseError { position: *self, message: message.to_string(), backtrace: Backtrace::new() } + // } fn eval_error(&self, message: S) -> EvalError { EvalError { position: *self, message: message.to_string(), backtrace: Backtrace::new() } } } +impl Default for InputPosition { + fn default() -> Self { + Self{ line: 1, column: 1, offset: 0 } + } +} + impl fmt::Display for InputPosition { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "{}:{}", self.line, self.column) @@ -188,78 +194,132 @@ pub trait SyntaxElement { } } -#[derive(Debug, Clone)] -pub struct ParseError { +#[derive(Debug)] +pub enum ParseErrorType { + Info, + Error +} + +#[derive(Debug)] +pub struct ParseErrorStatement { + error_type: ParseErrorType, position: InputPosition, + filename: String, + context: String, message: String, - backtrace: Backtrace, } -impl ParseError { - pub fn new(position: InputPosition, message: S) -> ParseError { - ParseError { position, message: message.to_string(), backtrace: Backtrace::new() } +impl ParseErrorStatement { + fn from_source(error_type: ParseErrorType, source: &InputSource, position: InputPosition, msg: &str) -> Self { + // Seek line start and end + println!("DEBUG[1]:\nPos {}, msg: {},\nDEBUG[2]: In source:\n{}", + position, msg, String::from_utf8_lossy(&source.input)); + debug_assert!(position.column < position.offset); + let line_start = position.offset - (position.column - 1); + let mut line_end = position.offset; + while line_end < source.input.len() && source.input[line_end] != b'\n' { + line_end += 1; + } + + // Compensate for '\r\n' + if line_end > line_start && source.input[line_end - 1] == b'\r' { + line_end -= 1; + } + + Self{ + error_type, + position, + filename: source.filename.clone(), + context: String::from_utf8_lossy(&source.input[line_start..line_end]).to_string(), + message: msg.to_string() + } } - // Diagnostic methods - pub fn write(&self, source: &InputSource, writer: &mut A) -> io::Result<()> { - if !source.filename.is_empty() { - writeln!( - writer, - "Parse error at {}:{}: {}", - source.filename, self.position, self.message - )?; +} + +impl fmt::Display for ParseErrorStatement { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + // Write message + match self.error_type { + ParseErrorType::Info => write!(f, " INFO: ")?, + ParseErrorType::Error => write!(f, "ERROR: ")?, + } + writeln!(f, "{}", &self.message); + + // Write originating file/line/column + if self.filename.is_empty() { + writeln!(f, " +- at {}:{}", self.position.line, self.position.column)?; } else { - writeln!(writer, "Parse error at {}: {}", self.position, self.message)?; + writeln!(f, " +- at {}:{}:{}", self.filename, self.position.line, self.position.column)?; } - let line = self.position.context(source); - writeln!(writer, "{}", String::from_utf8_lossy(line))?; - let mut arrow: Vec = Vec::new(); - for pos in 1..self.position.column { - let c = line[pos - 1]; - if c == b'\t' { - arrow.push(b'\t') + + // Write source context + writeln!(f, " | ")?; + writeln!(f, " | {}", self.context)?; + + // Write underline indicating where the error ocurred + debug_assert!(self.position.column <= self.context.chars().count()); + let mut arrow = String::with_capacity(self.context.len() + 3); + arrow.push_str(" | "); + let mut char_col = 1; + for char in self.context.chars() { + if char_col == self.position.column { break; } + if char == '\t' { + arrow.push('\t'); } else { - arrow.push(b' ') + arrow.push(' '); } + + char_col += 1; } - arrow.push(b'^'); - writeln!(writer, "{}", String::from_utf8_lossy(&arrow)) - } - pub fn print(&self, source: &InputSource) { - self.write(source, &mut std::io::stdout()).unwrap() - } - pub fn display<'a>(&'a self, source: &'a InputSource) -> DisplayParseError<'a> { - DisplayParseError::new(self, source) - } -} + arrow.push('^'); + writeln!(f, "{}", arrow)?; -impl From for io::Error { - fn from(_: ParseError) -> io::Error { - io::Error::new(io::ErrorKind::InvalidInput, "parse error") + Ok(()) } } -#[derive(Clone, Copy)] -pub struct DisplayParseError<'a> { - error: &'a ParseError, - source: &'a InputSource, +#[derive(Debug)] +pub struct ParseError2 { + statements: Vec } -impl DisplayParseError<'_> { - fn new<'a>(error: &'a ParseError, source: &'a InputSource) -> DisplayParseError<'a> { - DisplayParseError { error, source } +impl fmt::Display for ParseError2 { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + if self.statements.is_empty() { + return Ok(()) + } + + self.statements[0].fmt(f)?; + for statement in self.statements.iter().skip(1) { + writeln!(f)?; + statement.fmt(f)?; + } + + Ok(()) } } -impl fmt::Display for DisplayParseError<'_> { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let mut vec: Vec = Vec::new(); - match self.error.write(self.source, &mut vec) { - Err(_) => { - return fmt::Result::Err(fmt::Error); - } - Ok(_) => {} - } - write!(f, "{}", String::from_utf8_lossy(&vec)) +impl ParseError2 { + pub fn empty() -> Self { + Self{ statements: Vec::new() } + } + + pub fn new_error(source: &InputSource, position: InputPosition, msg: &str) -> Self { + Self{ statements: vec!(ParseErrorStatement::from_source(ParseErrorType::Error, source, position, msg))} + } + + pub fn with_prefixed(mut self, error_type: ParseErrorType, source: &InputSource, position: InputPosition, msg: &str) -> Self { + self.statements.insert(0, ParseErrorStatement::from_source(error_type, source, position, msg)); + self + } + + pub fn with_postfixed(mut self, error_type: ParseErrorType, source: &InputSource, position: InputPosition, msg: &str) -> Self { + self.statements.push(ParseErrorStatement::from_source(error_type, source, position, msg)); + self + } + + pub fn with_postfixed_info(self, source: &InputSource, position: InputPosition, msg: &str) -> Self { + self.with_postfixed(ParseErrorType::Info, source, position, msg) } } diff --git a/src/protocol/lexer.rs b/src/protocol/lexer.rs index 8a42cf1d6bf8e956dfb682c1d763836d60e88c1e..ced2c6a589f46bf43faa5c5e8c21fbbf4ab5de72 100644 --- a/src/protocol/lexer.rs +++ b/src/protocol/lexer.rs @@ -57,7 +57,6 @@ fn is_integer_rest(x: Option) -> bool { || c >= b'a' && c <= b'f' || c >= b'A' && c <= b'F' || c == b'x' - || c == b'X' || c == b'o' } else { false @@ -81,12 +80,15 @@ impl Lexer<'_> { pub fn new(source: &mut InputSource) -> Lexer { Lexer { source, level: 0 } } - fn consume_line(&mut self) -> Result, ParseError> { + fn error_at_pos(&self, msg: &str) -> ParseError2 { + ParseError2::new_error(self.source, self.source.pos(), msg) + } + fn consume_line(&mut self) -> Result, ParseError2> { let mut result: Vec = Vec::new(); let mut next = self.source.next(); while next.is_some() && next != Some(b'\n') && next != Some(b'\r') { if !(is_vchar(next) || is_wsp(next)) { - return Err(self.source.error("Expected visible character or whitespace")); + return Err(self.error_at_pos("Expected visible character or whitespace")); } result.push(next.unwrap()); self.source.consume(); @@ -100,7 +102,7 @@ impl Lexer<'_> { } Ok(result) } - fn consume_whitespace(&mut self, expected: bool) -> Result<(), ParseError> { + fn consume_whitespace(&mut self, expected: bool) -> Result<(), ParseError2> { let mut found = false; let mut next = self.source.next(); while next.is_some() { @@ -148,7 +150,7 @@ impl Lexer<'_> { break; } if expected && !found { - Err(self.source.error("Expected whitespace")) + Err(self.error_at_pos("Expected whitespace")) } else { Ok(()) } @@ -165,24 +167,19 @@ impl Lexer<'_> { true } } - fn consume_keyword(&mut self, keyword: &[u8]) -> Result<(), ParseError> { + fn consume_keyword(&mut self, keyword: &[u8]) -> Result<(), ParseError2> { let len = keyword.len(); for i in 0..len { let expected = Some(lowercase(keyword[i])); let next = self.source.next(); if next != expected { - return Err(self - .source - .error(format!("Expected keyword: {}", String::from_utf8_lossy(keyword)))); + return Err(self.error_at_pos(&format!("Expected keyword '{}'", String::from_utf8_lossy(keyword)))); } self.source.consume(); } if let Some(next) = self.source.next() { - if next >= b'A' && next <= b'Z' || next >= b'a' && next <= b'z' { - return Err(self.source.error(format!( - "Expected word boundary after keyword: {}", - String::from_utf8_lossy(keyword) - ))); + if next >= b'A' && next <= b'Z' || next >= b'a' && next <= b'z' || next >= b'0' && next <= b'9' { + return Err(self.error_at_pos(&format!("Expected word boundary after '{}'", String::from_utf8_lossy(keyword)))); } } Ok(()) @@ -190,23 +187,21 @@ impl Lexer<'_> { fn has_string(&self, string: &[u8]) -> bool { self.source.has(string) } - fn consume_string(&mut self, string: &[u8]) -> Result<(), ParseError> { + fn consume_string(&mut self, string: &[u8]) -> Result<(), ParseError2> { let len = string.len(); for i in 0..len { let expected = Some(string[i]); let next = self.source.next(); if next != expected { - return Err(self - .source - .error(format!("Expected {}", String::from_utf8_lossy(string)))); + return Err(self.error_at_pos(&format!("Expected {}", String::from_utf8_lossy(string)))); } self.source.consume(); } Ok(()) } - fn consume_ident(&mut self) -> Result, ParseError> { + fn consume_ident(&mut self) -> Result, ParseError2> { if !self.has_identifier() { - return Err(self.source.error("Expected identifier")); + return Err(self.error_at_pos("Expected identifier")); } let mut result = Vec::new(); let mut next = self.source.next(); @@ -223,7 +218,7 @@ impl Lexer<'_> { fn has_integer(&mut self) -> bool { is_integer_start(self.source.next()) } - fn consume_integer(&mut self) -> Result { + fn consume_integer(&mut self) -> Result { let position = self.source.pos(); let mut data = Vec::new(); let mut next = self.source.next(); @@ -256,7 +251,7 @@ impl Lexer<'_> { }; if let Err(_err) = parsed { - return Err(ParseError::new(position, "Invalid integer constant")); + return Err(ParseError2::new_error(&self.source, position, "Invalid integer constant")); } Ok(parsed.unwrap()) @@ -305,18 +300,17 @@ impl Lexer<'_> { let next = self.source.next(); is_ident_start(next) } - fn consume_identifier(&mut self, h: &mut Heap) -> Result { + fn consume_identifier(&mut self, h: &mut Heap) -> Result { if self.has_statement_keyword() || self.has_type_keyword() || self.has_builtin_keyword() { - return Err(self.source.error("Expected identifier")); + return Err(self.error_at_pos("Expected identifier")); } let position = self.source.pos(); let value = self.consume_ident()?; - let id = h.alloc_source_identifier(|this| SourceIdentifier { this, position, value }); - Ok(id) + Ok(Identifier{ position, value }) } - fn consume_identifier_spilled(&mut self) -> Result<(), ParseError> { + fn consume_identifier_spilled(&mut self) -> Result<(), ParseError2> { if self.has_statement_keyword() || self.has_type_keyword() || self.has_builtin_keyword() { - return Err(self.source.error("Expected identifier")); + return Err(self.error_at_pos("Expected identifier")); } self.consume_ident()?; Ok(()) @@ -324,7 +318,7 @@ impl Lexer<'_> { // Types and type annotations - fn consume_primitive_type(&mut self) -> Result { + fn consume_primitive_type(&mut self) -> Result { if self.has_keyword(b"in") { self.consume_keyword(b"in")?; Ok(PrimitiveType::Input) @@ -364,7 +358,7 @@ impl Lexer<'_> { self.source.seek(backup_pos); return result; } - fn consume_type(&mut self) -> Result { + fn consume_type(&mut self) -> Result { let primitive = self.consume_primitive_type()?; let array; if self.has_array() { @@ -375,32 +369,32 @@ impl Lexer<'_> { } Ok(Type { primitive, array }) } - fn create_type_annotation_input(&self, h: &mut Heap) -> Result { + fn create_type_annotation_input(&self, h: &mut Heap) -> Result { let position = self.source.pos(); let the_type = Type::INPUT; let id = h.alloc_type_annotation(|this| TypeAnnotation { this, position, the_type }); Ok(id) } - fn create_type_annotation_output(&self, h: &mut Heap) -> Result { + fn create_type_annotation_output(&self, h: &mut Heap) -> Result { let position = self.source.pos(); let the_type = Type::OUTPUT; let id = h.alloc_type_annotation(|this| TypeAnnotation { this, position, the_type }); Ok(id) } - fn consume_type_annotation(&mut self, h: &mut Heap) -> Result { + fn consume_type_annotation(&mut self, h: &mut Heap) -> Result { let position = self.source.pos(); let the_type = self.consume_type()?; let id = h.alloc_type_annotation(|this| TypeAnnotation { this, position, the_type }); Ok(id) } - fn consume_type_annotation_spilled(&mut self) -> Result<(), ParseError> { + fn consume_type_annotation_spilled(&mut self) -> Result<(), ParseError2> { self.consume_type()?; Ok(()) } // Parameters - fn consume_parameter(&mut self, h: &mut Heap) -> Result { + fn consume_parameter(&mut self, h: &mut Heap) -> Result { let position = self.source.pos(); let type_annotation = self.consume_type_annotation(h)?; self.consume_whitespace(true)?; @@ -413,7 +407,7 @@ impl Lexer<'_> { &mut self, h: &mut Heap, params: &mut Vec, - ) -> Result<(), ParseError> { + ) -> Result<(), ParseError2> { self.consume_string(b"(")?; self.consume_whitespace(false)?; if !self.has_string(b")") { @@ -427,14 +421,16 @@ impl Lexer<'_> { self.consume_whitespace(false)?; } } - self.consume_string(b")") + self.consume_string(b")")?; + + Ok(()) } // ==================== // Expressions // ==================== - fn consume_paren_expression(&mut self, h: &mut Heap) -> Result { + fn consume_paren_expression(&mut self, h: &mut Heap) -> Result { self.consume_string(b"(")?; self.consume_whitespace(false)?; let result = self.consume_expression(h)?; @@ -442,16 +438,16 @@ impl Lexer<'_> { self.consume_string(b")")?; Ok(result) } - fn consume_expression(&mut self, h: &mut Heap) -> Result { + fn consume_expression(&mut self, h: &mut Heap) -> Result { if self.level >= MAX_LEVEL { - return Err(self.source.error("Too deeply nested expression")); + return Err(self.error_at_pos("Too deeply nested expression")); } self.level += 1; let result = self.consume_assignment_expression(h); self.level -= 1; result } - fn consume_assignment_expression(&mut self, h: &mut Heap) -> Result { + fn consume_assignment_expression(&mut self, h: &mut Heap) -> Result { let result = self.consume_conditional_expression(h)?; self.consume_whitespace(false)?; if self.has_assignment_operator() { @@ -485,7 +481,7 @@ impl Lexer<'_> { || self.has_string(b"^=") || self.has_string(b"|=") } - fn consume_assignment_operator(&mut self) -> Result { + fn consume_assignment_operator(&mut self) -> Result { if self.has_string(b"=") { self.consume_string(b"=")?; Ok(AssignmentOperator::Set) @@ -520,10 +516,10 @@ impl Lexer<'_> { self.consume_string(b"|=")?; Ok(AssignmentOperator::BitwiseOred) } else { - Err(self.source.error("Expected assignment operator")) + Err(self.error_at_pos("Expected assignment operator")) } } - fn consume_conditional_expression(&mut self, h: &mut Heap) -> Result { + fn consume_conditional_expression(&mut self, h: &mut Heap) -> Result { let result = self.consume_concat_expression(h)?; self.consume_whitespace(false)?; if self.has_string(b"?") { @@ -548,7 +544,7 @@ impl Lexer<'_> { Ok(result) } } - fn consume_concat_expression(&mut self, h: &mut Heap) -> Result { + fn consume_concat_expression(&mut self, h: &mut Heap) -> Result { let mut result = self.consume_lor_expression(h)?; self.consume_whitespace(false)?; while self.has_string(b"@") { @@ -571,7 +567,7 @@ impl Lexer<'_> { } Ok(result) } - fn consume_lor_expression(&mut self, h: &mut Heap) -> Result { + fn consume_lor_expression(&mut self, h: &mut Heap) -> Result { let mut result = self.consume_land_expression(h)?; self.consume_whitespace(false)?; while self.has_string(b"||") { @@ -594,7 +590,7 @@ impl Lexer<'_> { } Ok(result) } - fn consume_land_expression(&mut self, h: &mut Heap) -> Result { + fn consume_land_expression(&mut self, h: &mut Heap) -> Result { let mut result = self.consume_bor_expression(h)?; self.consume_whitespace(false)?; while self.has_string(b"&&") { @@ -617,7 +613,7 @@ impl Lexer<'_> { } Ok(result) } - fn consume_bor_expression(&mut self, h: &mut Heap) -> Result { + fn consume_bor_expression(&mut self, h: &mut Heap) -> Result { let mut result = self.consume_xor_expression(h)?; self.consume_whitespace(false)?; while self.has_string(b"|") && !self.has_string(b"||") && !self.has_string(b"|=") { @@ -640,7 +636,7 @@ impl Lexer<'_> { } Ok(result) } - fn consume_xor_expression(&mut self, h: &mut Heap) -> Result { + fn consume_xor_expression(&mut self, h: &mut Heap) -> Result { let mut result = self.consume_band_expression(h)?; self.consume_whitespace(false)?; while self.has_string(b"^") && !self.has_string(b"^=") { @@ -663,7 +659,7 @@ impl Lexer<'_> { } Ok(result) } - fn consume_band_expression(&mut self, h: &mut Heap) -> Result { + fn consume_band_expression(&mut self, h: &mut Heap) -> Result { let mut result = self.consume_eq_expression(h)?; self.consume_whitespace(false)?; while self.has_string(b"&") && !self.has_string(b"&&") && !self.has_string(b"&=") { @@ -686,7 +682,7 @@ impl Lexer<'_> { } Ok(result) } - fn consume_eq_expression(&mut self, h: &mut Heap) -> Result { + fn consume_eq_expression(&mut self, h: &mut Heap) -> Result { let mut result = self.consume_rel_expression(h)?; self.consume_whitespace(false)?; while self.has_string(b"==") || self.has_string(b"!=") { @@ -715,7 +711,7 @@ impl Lexer<'_> { } Ok(result) } - fn consume_rel_expression(&mut self, h: &mut Heap) -> Result { + fn consume_rel_expression(&mut self, h: &mut Heap) -> Result { let mut result = self.consume_shift_expression(h)?; self.consume_whitespace(false)?; while self.has_string(b"<=") @@ -754,7 +750,7 @@ impl Lexer<'_> { } Ok(result) } - fn consume_shift_expression(&mut self, h: &mut Heap) -> Result { + fn consume_shift_expression(&mut self, h: &mut Heap) -> Result { let mut result = self.consume_add_expression(h)?; self.consume_whitespace(false)?; while self.has_string(b"<<") && !self.has_string(b"<<=") @@ -785,7 +781,7 @@ impl Lexer<'_> { } Ok(result) } - fn consume_add_expression(&mut self, h: &mut Heap) -> Result { + fn consume_add_expression(&mut self, h: &mut Heap) -> Result { let mut result = self.consume_mul_expression(h)?; self.consume_whitespace(false)?; while self.has_string(b"+") && !self.has_string(b"+=") @@ -816,7 +812,7 @@ impl Lexer<'_> { } Ok(result) } - fn consume_mul_expression(&mut self, h: &mut Heap) -> Result { + fn consume_mul_expression(&mut self, h: &mut Heap) -> Result { let mut result = self.consume_prefix_expression(h)?; self.consume_whitespace(false)?; while self.has_string(b"*") && !self.has_string(b"*=") @@ -851,7 +847,7 @@ impl Lexer<'_> { } Ok(result) } - fn consume_prefix_expression(&mut self, h: &mut Heap) -> Result { + fn consume_prefix_expression(&mut self, h: &mut Heap) -> Result { if self.has_string(b"+") || self.has_string(b"-") || self.has_string(b"~") @@ -884,7 +880,7 @@ impl Lexer<'_> { } self.consume_whitespace(false)?; if self.level >= MAX_LEVEL { - return Err(self.source.error("Too deeply nested expression")); + return Err(self.error_at_pos("Too deeply nested expression")); } self.level += 1; let result = self.consume_prefix_expression(h); @@ -901,7 +897,7 @@ impl Lexer<'_> { } self.consume_postfix_expression(h) } - fn consume_postfix_expression(&mut self, h: &mut Heap) -> Result { + fn consume_postfix_expression(&mut self, h: &mut Heap) -> Result { let mut result = self.consume_primary_expression(h)?; self.consume_whitespace(false)?; while self.has_string(b"++") @@ -997,7 +993,7 @@ impl Lexer<'_> { } Ok(result) } - fn consume_primary_expression(&mut self, h: &mut Heap) -> Result { + fn consume_primary_expression(&mut self, h: &mut Heap) -> Result { if self.has_string(b"(") { return self.consume_paren_expression(h); } @@ -1016,7 +1012,7 @@ impl Lexer<'_> { } Ok(self.consume_variable_expression(h)?.upcast()) } - fn consume_array_expression(&mut self, h: &mut Heap) -> Result { + fn consume_array_expression(&mut self, h: &mut Heap) -> Result { let position = self.source.pos(); let mut elements = Vec::new(); self.consume_string(b"{")?; @@ -1041,7 +1037,7 @@ impl Lexer<'_> { fn consume_constant_expression( &mut self, h: &mut Heap, - ) -> Result { + ) -> Result { let position = self.source.pos(); let value; if self.has_keyword(b"null") { @@ -1063,13 +1059,13 @@ impl Lexer<'_> { next = self.source.next(); } if next != Some(b'\'') || data.is_empty() { - return Err(self.source.error("Expected character constant")); + return Err(self.error_at_pos("Expected character constant")); } self.source.consume(); value = Constant::Character(data); } else { if !self.has_integer() { - return Err(self.source.error("Expected integer constant")); + return Err(self.error_at_pos("Expected integer constant")); } value = Constant::Integer(self.consume_integer()?); @@ -1097,7 +1093,7 @@ impl Lexer<'_> { self.source.seek(backup_pos); return result; } - fn consume_call_expression(&mut self, h: &mut Heap) -> Result { + fn consume_call_expression(&mut self, h: &mut Heap) -> Result { let position = self.source.pos(); let method; if self.has_keyword(b"get") { @@ -1140,7 +1136,7 @@ impl Lexer<'_> { fn consume_variable_expression( &mut self, h: &mut Heap, - ) -> Result { + ) -> Result { let position = self.source.pos(); let identifier = self.consume_identifier(h)?; Ok(h.alloc_variable_expression(|this| VariableExpression { @@ -1155,9 +1151,9 @@ impl Lexer<'_> { // Statements // ==================== - fn consume_statement(&mut self, h: &mut Heap) -> Result { + fn consume_statement(&mut self, h: &mut Heap) -> Result { if self.level >= MAX_LEVEL { - return Err(self.source.error("Too deeply nested statement")); + return Err(self.error_at_pos("Too deeply nested statement")); } self.level += 1; let result = self.consume_statement_impl(h); @@ -1182,7 +1178,7 @@ impl Lexer<'_> { self.source.seek(backup_pos); return result; } - fn consume_statement_impl(&mut self, h: &mut Heap) -> Result { + fn consume_statement_impl(&mut self, h: &mut Heap) -> Result { if self.has_string(b"{") { Ok(self.consume_block_statement(h)?) } else if self.has_keyword(b"skip") { @@ -1238,7 +1234,7 @@ impl Lexer<'_> { self.source.seek(backup_pos); return result; } - fn consume_block_statement(&mut self, h: &mut Heap) -> Result { + fn consume_block_statement(&mut self, h: &mut Heap) -> Result { let position = self.source.pos(); let mut statements = Vec::new(); self.consume_string(b"{")?; @@ -1266,7 +1262,7 @@ impl Lexer<'_> { .upcast()) } } - fn consume_local_statement(&mut self, h: &mut Heap) -> Result { + fn consume_local_statement(&mut self, h: &mut Heap) -> Result { if self.has_keyword(b"channel") { Ok(self.consume_channel_statement(h)?.upcast()) } else { @@ -1276,7 +1272,7 @@ impl Lexer<'_> { fn consume_channel_statement( &mut self, h: &mut Heap, - ) -> Result { + ) -> Result { let position = self.source.pos(); self.consume_keyword(b"channel")?; self.consume_whitespace(true)?; @@ -1309,7 +1305,7 @@ impl Lexer<'_> { next: None, })) } - fn consume_memory_statement(&mut self, h: &mut Heap) -> Result { + fn consume_memory_statement(&mut self, h: &mut Heap) -> Result { let position = self.source.pos(); let type_annotation = self.consume_type_annotation(h)?; self.consume_whitespace(true)?; @@ -1332,7 +1328,7 @@ impl Lexer<'_> { fn consume_labeled_statement( &mut self, h: &mut Heap, - ) -> Result { + ) -> Result { let position = self.source.pos(); let label = self.consume_identifier(h)?; self.consume_whitespace(false)?; @@ -1347,14 +1343,14 @@ impl Lexer<'_> { in_sync: None, })) } - fn consume_skip_statement(&mut self, h: &mut Heap) -> Result { + fn consume_skip_statement(&mut self, h: &mut Heap) -> Result { let position = self.source.pos(); self.consume_keyword(b"skip")?; self.consume_whitespace(false)?; self.consume_string(b";")?; Ok(h.alloc_skip_statement(|this| SkipStatement { this, position, next: None })) } - fn consume_if_statement(&mut self, h: &mut Heap) -> Result { + fn consume_if_statement(&mut self, h: &mut Heap) -> Result { let position = self.source.pos(); self.consume_keyword(b"if")?; self.consume_whitespace(false)?; @@ -1371,7 +1367,7 @@ impl Lexer<'_> { }; Ok(h.alloc_if_statement(|this| IfStatement { this, position, test, true_body, false_body })) } - fn consume_while_statement(&mut self, h: &mut Heap) -> Result { + fn consume_while_statement(&mut self, h: &mut Heap) -> Result { let position = self.source.pos(); self.consume_keyword(b"while")?; self.consume_whitespace(false)?; @@ -1387,7 +1383,7 @@ impl Lexer<'_> { in_sync: None, })) } - fn consume_break_statement(&mut self, h: &mut Heap) -> Result { + fn consume_break_statement(&mut self, h: &mut Heap) -> Result { let position = self.source.pos(); self.consume_keyword(b"break")?; self.consume_whitespace(false)?; @@ -1404,7 +1400,7 @@ impl Lexer<'_> { fn consume_continue_statement( &mut self, h: &mut Heap, - ) -> Result { + ) -> Result { let position = self.source.pos(); self.consume_keyword(b"continue")?; self.consume_whitespace(false)?; @@ -1426,7 +1422,7 @@ impl Lexer<'_> { fn consume_synchronous_statement( &mut self, h: &mut Heap, - ) -> Result { + ) -> Result { let position = self.source.pos(); self.consume_keyword(b"synchronous")?; self.consume_whitespace(false)?; @@ -1435,7 +1431,7 @@ impl Lexer<'_> { self.consume_parameters(h, &mut parameters)?; self.consume_whitespace(false)?; } else if !self.has_keyword(b"skip") && !self.has_string(b"{") { - return Err(self.source.error("Expected block statement")); + return Err(self.error_at_pos("Expected block statement")); } let body = self.consume_statement(h)?; Ok(h.alloc_synchronous_statement(|this| SynchronousStatement { @@ -1446,7 +1442,7 @@ impl Lexer<'_> { parent_scope: None, })) } - fn consume_return_statement(&mut self, h: &mut Heap) -> Result { + fn consume_return_statement(&mut self, h: &mut Heap) -> Result { let position = self.source.pos(); self.consume_keyword(b"return")?; self.consume_whitespace(false)?; @@ -1459,7 +1455,7 @@ impl Lexer<'_> { self.consume_string(b";")?; Ok(h.alloc_return_statement(|this| ReturnStatement { this, position, expression })) } - fn consume_assert_statement(&mut self, h: &mut Heap) -> Result { + fn consume_assert_statement(&mut self, h: &mut Heap) -> Result { let position = self.source.pos(); self.consume_keyword(b"assert")?; self.consume_whitespace(false)?; @@ -1477,7 +1473,7 @@ impl Lexer<'_> { next: None, })) } - fn consume_goto_statement(&mut self, h: &mut Heap) -> Result { + fn consume_goto_statement(&mut self, h: &mut Heap) -> Result { let position = self.source.pos(); self.consume_keyword(b"goto")?; self.consume_whitespace(false)?; @@ -1486,7 +1482,7 @@ impl Lexer<'_> { self.consume_string(b";")?; Ok(h.alloc_goto_statement(|this| GotoStatement { this, position, label, target: None })) } - fn consume_new_statement(&mut self, h: &mut Heap) -> Result { + fn consume_new_statement(&mut self, h: &mut Heap) -> Result { let position = self.source.pos(); self.consume_keyword(b"new")?; self.consume_whitespace(false)?; @@ -1495,7 +1491,7 @@ impl Lexer<'_> { self.consume_string(b";")?; Ok(h.alloc_new_statement(|this| NewStatement { this, position, expression, next: None })) } - fn consume_put_statement(&mut self, h: &mut Heap) -> Result { + fn consume_put_statement(&mut self, h: &mut Heap) -> Result { let position = self.source.pos(); self.consume_keyword(b"put")?; self.consume_whitespace(false)?; @@ -1514,7 +1510,7 @@ impl Lexer<'_> { fn consume_expression_statement( &mut self, h: &mut Heap, - ) -> Result { + ) -> Result { let position = self.source.pos(); let expression = self.consume_expression(h)?; self.consume_whitespace(false)?; @@ -1537,21 +1533,165 @@ impl Lexer<'_> { || self.has_type_keyword() || self.has_identifier() } - fn consume_symbol_definition(&mut self, h: &mut Heap) -> Result { - if self.has_keyword(b"composite") || self.has_keyword(b"primitive") { + fn consume_symbol_definition(&mut self, h: &mut Heap) -> Result { + if self.has_keyword(b"struct") { + Ok(self.consume_struct_definition(h)?.upcast()) + } else if self.has_keyword(b"enum") { + Ok(self.consume_enum_definition(h)?.upcast()) + } else if self.has_keyword(b"composite") || self.has_keyword(b"primitive") { Ok(self.consume_component_definition(h)?.upcast()) } else { Ok(self.consume_function_definition(h)?.upcast()) } } - fn consume_component_definition(&mut self, h: &mut Heap) -> Result { + fn consume_struct_definition(&mut self, h: &mut Heap) -> Result { + // Parse "struct" keyword and its identifier + let struct_pos = self.source.pos(); + self.consume_keyword(b"struct")?; + self.consume_whitespace(true)?; + let struct_ident = self.consume_identifier(h)?; + self.consume_whitespace(false)?; + + // Parse struct fields + self.consume_string(b"{")?; + let mut next = self.source.next(); + let mut fields = Vec::new(); + while next.is_some() { + let char = next.unwrap(); + if char == b'}' { + break; + } + + // Consume field definition + self.consume_whitespace(false)?; + let field_position = self.source.pos(); + let field_type = self.consume_type_annotation(h)?; + self.consume_whitespace(true)?; + let field_ident = self.consume_identifier(h)?; + self.consume_whitespace(false)?; + + fields.push(StructFieldDefinition{ + position: field_position, + field: field_ident, + the_type: field_type, + }); + + // If we have a comma, then we may or may not have another field + // definition. Otherwise we expect the struct to be fully defined + // and expect a closing brace + next = self.source.next(); + if let Some(b',') = next { + self.source.consume(); + self.consume_whitespace(false)?; + next = self.source.next(); + } else { + break; + } + } + + // End of struct definition, so we expect a closing brace + self.consume_string(b"}")?; + + // Valid struct definition + Ok(h.alloc_struct_definition(|this| StructDefinition{ + this, + position: struct_pos, + identifier: struct_ident, + fields, + })) + } + fn consume_enum_definition(&mut self, h: &mut Heap) -> Result { + // Parse "enum" keyword and its identifier + let enum_pos = self.source.pos(); + self.consume_keyword(b"enum")?; + self.consume_whitespace(true)?; + let enum_ident = self.consume_identifier(h)?; + self.consume_whitespace(false)?; + + // Parse enum variants + self.consume_string(b"{")?; + let mut next = self.source.next(); + let mut variants = Vec::new(); + while next.is_some() { + let char = next.unwrap(); + if char == b'}' { + break; + } + + // Consume variant identifier + self.consume_whitespace(false)?; + let variant_position = self.source.pos(); + let variant_ident = self.consume_identifier(h)?; + self.consume_whitespace(false)?; + + // Consume variant (tag) value: may be nothing, in which case it is + // assigned automatically, may be a constant integer, or an embedded + // type as value, resulting in a tagged union + next = self.source.next(); + let variant_value = if let Some(b',') = next { + EnumVariantValue::None + } else if let Some(b'=') = next { + self.source.consume(); + self.consume_whitespace(false)?; + if !self.has_integer() { + return Err(self.error_at_pos("expected integer")); + } + let variant_int = self.consume_integer()?; + self.consume_whitespace(false)?; + EnumVariantValue::Integer(variant_int) + } else if let Some(b'(') = next { + self.source.consume(); + self.consume_whitespace(false)?; + let variant_type = self.consume_type_annotation(h)?; + self.consume_whitespace(false)?; + self.consume_string(b")")?; + self.consume_whitespace(false)?; + EnumVariantValue::Type(variant_type) + } else { + return Err(self.error_at_pos("expected ',', '=', or '('")); + }; + + variants.push(EnumVariantDefinition{ + position: variant_position, + identifier: variant_ident, + value: variant_value + }); + + // If we have a comma, then we may or may not have another variant, + // otherwise we expect the enum is fully defined + next = self.source.next(); + if let Some(b',') = next { + self.source.consume(); + self.consume_whitespace(false)?; + next = self.source.next(); + } else { + break; + } + } + + self.consume_string(b"}")?; + + // An enum without variants is somewhat valid, but completely useless + // within the language + if variants.is_empty() { + return Err(ParseError2::new_error(self.source, enum_pos, "enum definition without variants")); + } + + Ok(h.alloc_enum_definition(|this| EnumDefinition{ + this, + position: enum_pos, + identifier: enum_ident, + variants, + })) + } + fn consume_component_definition(&mut self, h: &mut Heap) -> Result { if self.has_keyword(b"composite") { Ok(self.consume_composite_definition(h)?.upcast()) } else { Ok(self.consume_primitive_definition(h)?.upcast()) } } - fn consume_composite_definition(&mut self, h: &mut Heap) -> Result { + fn consume_composite_definition(&mut self, h: &mut Heap) -> Result { let position = self.source.pos(); self.consume_keyword(b"composite")?; self.consume_whitespace(true)?; @@ -1563,7 +1703,7 @@ impl Lexer<'_> { let body = self.consume_block_statement(h)?; Ok(h.alloc_composite(|this| Composite { this, position, identifier, parameters, body })) } - fn consume_primitive_definition(&mut self, h: &mut Heap) -> Result { + fn consume_primitive_definition(&mut self, h: &mut Heap) -> Result { let position = self.source.pos(); self.consume_keyword(b"primitive")?; self.consume_whitespace(true)?; @@ -1575,7 +1715,7 @@ impl Lexer<'_> { let body = self.consume_block_statement(h)?; Ok(h.alloc_primitive(|this| Primitive { this, position, identifier, parameters, body })) } - fn consume_function_definition(&mut self, h: &mut Heap) -> Result { + fn consume_function_definition(&mut self, h: &mut Heap) -> Result { let position = self.source.pos(); let return_type = self.consume_type_annotation(h)?; self.consume_whitespace(true)?; @@ -1601,21 +1741,21 @@ impl Lexer<'_> { false } } - fn consume_pragma(&mut self, h: &mut Heap) -> Result { + fn consume_pragma(&mut self, h: &mut Heap) -> Result { let position = self.source.pos(); let next = self.source.next(); if next != Some(b'#') { - return Err(self.source.error("Expected pragma")); + return Err(self.error_at_pos("Expected pragma")); } self.source.consume(); if !is_vchar(self.source.next()) { - return Err(self.source.error("Expected pragma")); + return Err(self.error_at_pos("Expected pragma")); } if self.has_string(b"version") { self.consume_string(b"version")?; self.consume_whitespace(true)?; if !self.has_integer() { - return Err(self.source.error("Expected integer constant")); + return Err(self.error_at_pos("Expected integer constant")); } let version = self.consume_integer()?; debug_assert!(version >= 0); @@ -1626,7 +1766,7 @@ impl Lexer<'_> { self.consume_string(b"module")?; self.consume_whitespace(true)?; if !self.has_identifier() { - return Err(self.source.error("Expected identifier")); + return Err(self.error_at_pos("Expected identifier")); } let mut value = Vec::new(); let mut ident = self.consume_ident()?; @@ -1641,30 +1781,152 @@ impl Lexer<'_> { this, position, value }))); } else { - return Err(self.source.error("Unknown pragma")); + return Err(self.error_at_pos("Unknown pragma")); } } + fn has_import(&self) -> bool { self.has_keyword(b"import") } - fn consume_import(&mut self, h: &mut Heap) -> Result { + fn consume_import(&mut self, h: &mut Heap) -> Result { + // Parse the word "import" and the name of the module let position = self.source.pos(); self.consume_keyword(b"import")?; self.consume_whitespace(true)?; let mut value = Vec::new(); let mut ident = self.consume_ident()?; value.append(&mut ident); + let mut last_ident_start = 0; + while self.has_string(b".") { self.consume_string(b".")?; value.push(b'.'); ident = self.consume_ident()?; + last_ident_start = value.len(); value.append(&mut ident); } + + + self.consume_whitespace(false)?; + + // Check for the potential aliasing or specific module imports + let import = if self.has_string(b"as") { + self.consume_string(b"as")?; + self.consume_whitespace(true)?; + let alias = self.consume_ident()?; + + h.alloc_import(|this| Import::Module(ImportModule{ + this, + position, + module_name: value, + alias, + module_id: None, + })) + } else if self.has_string(b"::") { + self.consume_string(b"::")?; + self.consume_whitespace(false)?; + + if let Some(b'{') = self.source.next() { + // Import specific symbols, optionally with an alias + self.source.consume(); + self.consume_whitespace(false)?; + + let mut symbols = Vec::new(); + let mut next = self.source.next(); + + while next.is_some() { + let char = next.unwrap(); + if char == b'}' { + break; + } + + let symbol_position = self.source.pos(); + let symbol_name = self.consume_ident()?; + self.consume_whitespace(false)?; + if self.has_string(b"as") { + // Symbol has an alias + self.consume_string(b"as")?; + self.consume_whitespace(true)?; + let symbol_alias = self.consume_ident()?; + + symbols.push(AliasedSymbol{ + position: symbol_position, + name: symbol_name, + alias: symbol_alias, + definition_id: None, + }); + } else { + // Symbol does not have an alias + symbols.push(AliasedSymbol{ + position: symbol_position, + name: symbol_name.clone(), + alias: symbol_name, + definition_id: None, + }); + } + + // A comma indicates that we may have another symbol coming + // up (not necessary), but if not present then we expect the + // end of the symbol list + self.consume_whitespace(false)?; + + next = self.source.next(); + if let Some(b',') = next { + self.source.consume(); + self.consume_whitespace(false)?; + next = self.source.next(); + } else { + break; + } + } + + if let Some(b'}') = next { + // We are fine, push the imported symbols + self.source.consume(); + if symbols.is_empty() { + return Err(ParseError2::new_error(self.source, position, "empty symbol import list")); + } + + h.alloc_import(|this| Import::Symbols(ImportSymbols{ + this, + position, + module_name: value, + module_id: None, + symbols, + })) + } else { + return Err(self.error_at_pos("Expected '}'")); + } + } else if let Some(b'*') = self.source.next() { + // Import all symbols without alias + self.source.consume(); + h.alloc_import(|this| Import::Symbols(ImportSymbols{ + this, + position, + module_name: value, + module_id: None, + symbols: Vec::new() + })) + } else { + return Err(self.error_at_pos("Expected '*' or '{'")); + } + } else { + // No explicit alias or subimports, so implicit alias + let alias = Vec::from(&value[last_ident_start..]); + h.alloc_import(|this| Import::Module(ImportModule{ + this, + position, + module_name: value, + alias, + module_id: None, + })) + }; + self.consume_whitespace(false)?; self.consume_string(b";")?; - Ok(h.alloc_import(|this| Import { this, position, value })) + Ok(import) } - pub fn consume_protocol_description(&mut self, h: &mut Heap) -> Result { + pub fn consume_protocol_description(&mut self, h: &mut Heap) -> Result { let position = self.source.pos(); let mut pragmas = Vec::new(); let mut imports = Vec::new(); @@ -1680,16 +1942,14 @@ impl Lexer<'_> { imports.push(import); self.consume_whitespace(false)?; } - // do-while block - while { + while self.has_symbol_definition() { let def = self.consume_symbol_definition(h)?; definitions.push(def); self.consume_whitespace(false)?; - self.has_symbol_definition() - } {} + } // end of file if !self.source.is_eof() { - return Err(self.source.error("Expected end of file")); + return Err(self.error_at_pos("Expected end of file")); } Ok(h.alloc_protocol_description(|this| Root { this, @@ -1704,8 +1964,9 @@ impl Lexer<'_> { #[cfg(test)] mod tests { - use crate::protocol::ast::Expression::*; + use crate::protocol::ast::*; use crate::protocol::{ast, lexer::*}; + use crate::protocol::inputsource::*; #[test] fn test_pragmas() { @@ -1730,10 +1991,171 @@ mod tests { if let Pragma::Module(m) = pm { assert_eq!(m.value, b"something.dot.separated"); } else { - assert!(false, "second pragma not version"); + assert!(false, "second pragma not module"); } } + #[test] + fn test_import() { + let mut h = Heap::new(); + let mut input = InputSource::from_string(" + // Module imports, with optional and explicit aliasing + import single_module; + import std.reo; + import something.other as alias; + // Symbol imports + import some_module::*; + import some_module::{Foo as Bar, Qux, Dix as Flu}; + import std.reo::{ + Foo as Bar, // because thing + Qux as Mox, // more explanations + Dix, /* yesh, import me */ + }; + ").unwrap(); + let mut lex = Lexer::new(&mut input); + let lexed = lex.consume_protocol_description(&mut h).unwrap(); + let root = &h[lexed]; + assert_eq!(root.imports.len(), 6); + let no_alias_single = h[root.imports[0]].as_module(); + let no_alias_multi = h[root.imports[1]].as_module(); + let with_alias = h[root.imports[2]].as_module(); + + assert_eq!(no_alias_single.module_name, b"single_module"); + assert_eq!(no_alias_single.alias, b"single_module"); + assert_eq!(no_alias_multi.module_name, b"std.reo"); + assert_eq!(no_alias_multi.alias, b"reo"); + assert_eq!(with_alias.module_name, b"something.other"); + assert_eq!(with_alias.alias, b"alias"); + + let all_symbols = h[root.imports[3]].as_symbols(); + let single_line_symbols = h[root.imports[4]].as_symbols(); + let multi_line_symbols = h[root.imports[5]].as_symbols(); + + assert_eq!(all_symbols.module_name, b"some_module"); + assert!(all_symbols.symbols.is_empty()); + assert_eq!(single_line_symbols.module_name, b"some_module"); + assert_eq!(single_line_symbols.symbols.len(), 3); + assert_eq!(single_line_symbols.symbols[0].name, b"Foo"); + assert_eq!(single_line_symbols.symbols[0].alias, b"Bar"); + assert_eq!(single_line_symbols.symbols[1].name, b"Qux"); + assert_eq!(single_line_symbols.symbols[1].alias, b"Qux"); + assert_eq!(single_line_symbols.symbols[2].name, b"Dix"); + assert_eq!(single_line_symbols.symbols[2].alias, b"Flu"); + assert_eq!(multi_line_symbols.module_name, b"std.reo"); + assert_eq!(multi_line_symbols.symbols.len(), 3); + assert_eq!(multi_line_symbols.symbols[0].name, b"Foo"); + assert_eq!(multi_line_symbols.symbols[0].alias, b"Bar"); + assert_eq!(multi_line_symbols.symbols[1].name, b"Qux"); + assert_eq!(multi_line_symbols.symbols[1].alias, b"Mox"); + assert_eq!(multi_line_symbols.symbols[2].name, b"Dix"); + assert_eq!(multi_line_symbols.symbols[2].alias, b"Dix"); + } + + #[test] + fn test_struct_definition() { + let mut h = Heap::new(); + let mut input = InputSource::from_string(" + struct Foo { + byte one, + short two, + Bar three, + } + struct Bar{int[] one, int[] two, Qux[] three} + ").unwrap(); + let mut lex = Lexer::new(&mut input); + let lexed = lex.consume_protocol_description(&mut h); + if let Err(err) = &lexed { + println!("{}", err); + } + let lexed = lexed.unwrap(); + let root = &h[lexed]; + + assert_eq!(root.definitions.len(), 2); + + let foo_def = h[root.definitions[0]].as_struct(); + assert_eq!(foo_def.identifier.value, b"Foo"); + assert_eq!(foo_def.fields.len(), 3); + assert_eq!(foo_def.fields[0].field.value, b"one"); + assert_eq!(h[foo_def.fields[0].the_type].the_type, Type::BYTE); + assert_eq!(foo_def.fields[1].field.value, b"two"); + assert_eq!(h[foo_def.fields[1].the_type].the_type, Type::SHORT); + assert_eq!(foo_def.fields[2].field.value, b"three"); + assert_eq!(h[foo_def.fields[2].the_type].the_type.primitive, PrimitiveType::Symbolic(Vec::from("Bar".as_bytes()))); + + let bar_def = h[root.definitions[1]].as_struct(); + assert_eq!(bar_def.identifier.value, b"Bar"); + assert_eq!(bar_def.fields.len(), 3); + assert_eq!(bar_def.fields[0].field.value, b"one"); + assert_eq!(h[bar_def.fields[0].the_type].the_type, Type::INT_ARRAY); + assert_eq!(bar_def.fields[1].field.value, b"two"); + assert_eq!(h[bar_def.fields[1].the_type].the_type, Type::INT_ARRAY); + assert_eq!(bar_def.fields[2].field.value, b"three"); + assert_eq!(h[bar_def.fields[2].the_type].the_type.array, true); + assert_eq!(h[bar_def.fields[2].the_type].the_type.primitive, PrimitiveType::Symbolic(Vec::from("Qux".as_bytes()))); + } + + #[test] + fn test_enum_definition() { + let mut h = Heap::new(); + let mut input = InputSource::from_string(" + enum Foo { + A = 0, + B = 5, + C, + D = 0xFF, + } + enum Bar { Ayoo, Byoo, Cyoo,} + enum Qux { A(byte[]), B(Bar[]), C(byte) + } + ").unwrap(); + let mut lex = Lexer::new(&mut input); + let lexed = lex.consume_protocol_description(&mut h).unwrap(); + let root = &h[lexed]; + + assert_eq!(root.definitions.len(), 3); + + let foo_def = h[root.definitions[0]].as_enum(); + assert_eq!(foo_def.identifier.value, b"Foo"); + assert_eq!(foo_def.variants.len(), 4); + assert_eq!(foo_def.variants[0].identifier.value, b"A"); + assert_eq!(foo_def.variants[0].value, EnumVariantValue::Integer(0)); + assert_eq!(foo_def.variants[1].identifier.value, b"B"); + assert_eq!(foo_def.variants[1].value, EnumVariantValue::Integer(5)); + assert_eq!(foo_def.variants[2].identifier.value, b"C"); + assert_eq!(foo_def.variants[2].value, EnumVariantValue::None); + assert_eq!(foo_def.variants[3].identifier.value, b"D"); + assert_eq!(foo_def.variants[3].value, EnumVariantValue::Integer(0xFF)); + + let bar_def = h[root.definitions[1]].as_enum(); + assert_eq!(bar_def.identifier.value, b"Bar"); + assert_eq!(bar_def.variants.len(), 3); + assert_eq!(bar_def.variants[0].identifier.value, b"Ayoo"); + assert_eq!(bar_def.variants[0].value, EnumVariantValue::None); + assert_eq!(bar_def.variants[1].identifier.value, b"Byoo"); + assert_eq!(bar_def.variants[1].value, EnumVariantValue::None); + assert_eq!(bar_def.variants[2].identifier.value, b"Cyoo"); + assert_eq!(bar_def.variants[2].value, EnumVariantValue::None); + + let qux_def = h[root.definitions[2]].as_enum(); + let enum_type = |value: &EnumVariantValue| -> &TypeAnnotation { + if let EnumVariantValue::Type(t) = value { + &h[*t] + } else { + assert!(false); + unreachable!(); + } + }; + assert_eq!(qux_def.identifier.value, b"Qux"); + assert_eq!(qux_def.variants.len(), 3); + assert_eq!(qux_def.variants[0].identifier.value, b"A"); + assert_eq!(enum_type(&qux_def.variants[0].value).the_type, Type::BYTE_ARRAY); + assert_eq!(qux_def.variants[1].identifier.value, b"B"); + assert_eq!(enum_type(&qux_def.variants[1].value).the_type.array, true); + assert_eq!(enum_type(&qux_def.variants[1].value).the_type.primitive, PrimitiveType::Symbolic(Vec::from("Bar".as_bytes()))); + assert_eq!(qux_def.variants[2].identifier.value, b"C"); + assert_eq!(enum_type(&qux_def.variants[2].value).the_type, Type::BYTE); + } + // #[test] // fn test_lowercase() { // assert_eq!(lowercase(b'a'), b'a'); diff --git a/src/protocol/library.rs b/src/protocol/library.rs index e2926aefbf7c683a9e5c8c34087303f611d82315..76a099e4233301533a7d53d83829181f26c2a02f 100644 --- a/src/protocol/library.rs +++ b/src/protocol/library.rs @@ -3,62 +3,63 @@ use crate::protocol::inputsource::*; // TABBED OUT FOR NOW -pub fn get_declarations(h: &mut Heap, i: ImportId) -> Result, ParseError> { - if h[i].value == b"std.reo" { - let mut vec = Vec::new(); - vec.push(cd(h, i, b"sync", &[Type::INPUT, Type::OUTPUT])); - vec.push(cd(h, i, b"syncdrain", &[Type::INPUT, Type::INPUT])); - vec.push(cd(h, i, b"syncspout", &[Type::OUTPUT, Type::OUTPUT])); - vec.push(cd(h, i, b"asyncdrain", &[Type::INPUT, Type::INPUT])); - vec.push(cd(h, i, b"asyncspout", &[Type::OUTPUT, Type::OUTPUT])); - vec.push(cd(h, i, b"merger", &[Type::INPUT_ARRAY, Type::OUTPUT])); - vec.push(cd(h, i, b"router", &[Type::INPUT, Type::OUTPUT_ARRAY])); - vec.push(cd(h, i, b"consensus", &[Type::INPUT_ARRAY, Type::OUTPUT])); - vec.push(cd(h, i, b"replicator", &[Type::INPUT, Type::OUTPUT_ARRAY])); - vec.push(cd(h, i, b"alternator", &[Type::INPUT_ARRAY, Type::OUTPUT])); - vec.push(cd(h, i, b"roundrobin", &[Type::INPUT, Type::OUTPUT_ARRAY])); - vec.push(cd(h, i, b"node", &[Type::INPUT_ARRAY, Type::OUTPUT_ARRAY])); - vec.push(cd(h, i, b"fifo", &[Type::INPUT, Type::OUTPUT])); - vec.push(cd(h, i, b"xfifo", &[Type::INPUT, Type::OUTPUT, Type::MESSAGE])); - vec.push(cd(h, i, b"nfifo", &[Type::INPUT, Type::OUTPUT, Type::INT])); - vec.push(cd(h, i, b"ufifo", &[Type::INPUT, Type::OUTPUT])); - Ok(vec) - } else if h[i].value == b"std.buf" { - let mut vec = Vec::new(); - vec.push(fd(h, i, b"writeByte", Type::BYTE, &[Type::MESSAGE, Type::INT, Type::BYTE])); - vec.push(fd(h, i, b"writeShort", Type::SHORT, &[Type::MESSAGE, Type::INT, Type::SHORT])); - vec.push(fd(h, i, b"writeInt", Type::INT, &[Type::MESSAGE, Type::INT, Type::INT])); - vec.push(fd(h, i, b"writeLong", Type::LONG, &[Type::MESSAGE, Type::INT, Type::LONG])); - vec.push(fd(h, i, b"readByte", Type::BYTE, &[Type::MESSAGE, Type::INT])); - vec.push(fd(h, i, b"readShort", Type::SHORT, &[Type::MESSAGE, Type::INT])); - vec.push(fd(h, i, b"readInt", Type::INT, &[Type::MESSAGE, Type::INT])); - vec.push(fd(h, i, b"readLong", Type::LONG, &[Type::MESSAGE, Type::INT])); - Ok(vec) - } else { - Err(ParseError::new(h[i].position, "Unknown import")) - } -} - -fn cd(h: &mut Heap, import: ImportId, ident: &[u8], sig: &[Type]) -> DeclarationId { - let identifier = h.get_external_identifier(ident).upcast(); - h.alloc_imported_declaration(|this| ImportedDeclaration { - this, - import, - signature: Signature::Component(ComponentSignature { identifier, arity: sig.to_vec() }), - }) - .upcast() -} - -fn fd(h: &mut Heap, import: ImportId, ident: &[u8], ret: Type, sig: &[Type]) -> DeclarationId { - let identifier = h.get_external_identifier(ident).upcast(); - h.alloc_imported_declaration(|this| ImportedDeclaration { - this, - import, - signature: Signature::Function(FunctionSignature { - return_type: ret, - identifier, - arity: sig.to_vec(), - }), - }) - .upcast() +pub fn get_declarations(h: &mut Heap, i: ImportId) -> Result, ParseError2> { + todo!("implement me"); + // if h[i].value == b"std.reo" { + // let mut vec = Vec::new(); + // vec.push(cd(h, i, b"sync", &[Type::INPUT, Type::OUTPUT])); + // vec.push(cd(h, i, b"syncdrain", &[Type::INPUT, Type::INPUT])); + // vec.push(cd(h, i, b"syncspout", &[Type::OUTPUT, Type::OUTPUT])); + // vec.push(cd(h, i, b"asyncdrain", &[Type::INPUT, Type::INPUT])); + // vec.push(cd(h, i, b"asyncspout", &[Type::OUTPUT, Type::OUTPUT])); + // vec.push(cd(h, i, b"merger", &[Type::INPUT_ARRAY, Type::OUTPUT])); + // vec.push(cd(h, i, b"router", &[Type::INPUT, Type::OUTPUT_ARRAY])); + // vec.push(cd(h, i, b"consensus", &[Type::INPUT_ARRAY, Type::OUTPUT])); + // vec.push(cd(h, i, b"replicator", &[Type::INPUT, Type::OUTPUT_ARRAY])); + // vec.push(cd(h, i, b"alternator", &[Type::INPUT_ARRAY, Type::OUTPUT])); + // vec.push(cd(h, i, b"roundrobin", &[Type::INPUT, Type::OUTPUT_ARRAY])); + // vec.push(cd(h, i, b"node", &[Type::INPUT_ARRAY, Type::OUTPUT_ARRAY])); + // vec.push(cd(h, i, b"fifo", &[Type::INPUT, Type::OUTPUT])); + // vec.push(cd(h, i, b"xfifo", &[Type::INPUT, Type::OUTPUT, Type::MESSAGE])); + // vec.push(cd(h, i, b"nfifo", &[Type::INPUT, Type::OUTPUT, Type::INT])); + // vec.push(cd(h, i, b"ufifo", &[Type::INPUT, Type::OUTPUT])); + // Ok(vec) + // } else if h[i].value == b"std.buf" { + // let mut vec = Vec::new(); + // vec.push(fd(h, i, b"writeByte", Type::BYTE, &[Type::MESSAGE, Type::INT, Type::BYTE])); + // vec.push(fd(h, i, b"writeShort", Type::SHORT, &[Type::MESSAGE, Type::INT, Type::SHORT])); + // vec.push(fd(h, i, b"writeInt", Type::INT, &[Type::MESSAGE, Type::INT, Type::INT])); + // vec.push(fd(h, i, b"writeLong", Type::LONG, &[Type::MESSAGE, Type::INT, Type::LONG])); + // vec.push(fd(h, i, b"readByte", Type::BYTE, &[Type::MESSAGE, Type::INT])); + // vec.push(fd(h, i, b"readShort", Type::SHORT, &[Type::MESSAGE, Type::INT])); + // vec.push(fd(h, i, b"readInt", Type::INT, &[Type::MESSAGE, Type::INT])); + // vec.push(fd(h, i, b"readLong", Type::LONG, &[Type::MESSAGE, Type::INT])); + // Ok(vec) + // } else { + // Err(ParseError::new(h[i].position, "Unknown import")) + // } } +// +// fn cd(h: &mut Heap, import: ImportId, ident: &[u8], sig: &[Type]) -> DeclarationId { +// let identifier = h.get_external_identifier(ident).upcast(); +// h.alloc_imported_declaration(|this| ImportedDeclaration { +// this, +// import, +// signature: Signature::Component(ComponentSignature { identifier, arity: sig.to_vec() }), +// }) +// .upcast() +// } +// +// fn fd(h: &mut Heap, import: ImportId, ident: &[u8], ret: Type, sig: &[Type]) -> DeclarationId { +// let identifier = h.get_external_identifier(ident).upcast(); +// h.alloc_imported_declaration(|this| ImportedDeclaration { +// this, +// import, +// signature: Signature::Function(FunctionSignature { +// return_type: ret, +// identifier, +// arity: sig.to_vec(), +// }), +// }) +// .upcast() +// } diff --git a/src/protocol/mod.rs b/src/protocol/mod.rs index 1f368eea38d029456c89ac98ac19e4e4f8029e29..42a3c0b002598c23ce55f4bd5ef548ea59ec414c 100644 --- a/src/protocol/mod.rs +++ b/src/protocol/mod.rs @@ -5,6 +5,7 @@ pub(crate) mod inputsource; // mod lexer; mod library; mod parser; +mod containers; // TODO: Remove when not benchmarking pub(crate) mod ast; @@ -51,17 +52,17 @@ impl std::fmt::Debug for ProtocolDescription { } impl ProtocolDescription { pub fn parse(buffer: &[u8]) -> Result { - let mut heap = Heap::new(); - let mut source = InputSource::from_buffer(buffer).unwrap(); - let mut parser = Parser::new(&mut source); - match parser.parse(&mut heap) { + // TODO: @fixme, keep code compilable, but needs support for multiple + // input files. + let source = InputSource::from_buffer(buffer).unwrap(); + let mut parser = Parser::new(); + parser.feed(source).expect("failed to parse source"); + match parser.parse() { Ok(root) => { - return Ok(ProtocolDescription { heap, source, root }); + return Ok(ProtocolDescription { heap: parser.heap, source: parser.modules[0].source.clone(), root }); } Err(err) => { - let mut vec: Vec = Vec::new(); - err.write(&source, &mut vec).unwrap(); - Err(String::from_utf8_lossy(&vec).to_string()) + Err(format!("{}", err)) } } } diff --git a/src/protocol/parser/depth_visitor.rs b/src/protocol/parser/depth_visitor.rs index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..33452fcb8aac76ca1d17458a603226e0692c5eb6 100644 --- a/src/protocol/parser/depth_visitor.rs +++ b/src/protocol/parser/depth_visitor.rs @@ -0,0 +1,1807 @@ +use crate::protocol::ast::*; +use crate::protocol::inputsource::*; +use crate::protocol::library; + +// The following indirection is needed due to a bug in the cbindgen tool. +type Unit = (); +pub(crate) type VisitorError = (InputPosition, String); // TODO: Revise when multi-file compiling is in place +pub(crate) type VisitorResult = Result; + +pub(crate) trait Visitor: Sized { + fn visit_protocol_description(&mut self, h: &mut Heap, pd: RootId) -> VisitorResult { + recursive_protocol_description(self, h, pd) + } + fn visit_pragma(&mut self, _h: &mut Heap, _pragma: PragmaId) -> VisitorResult { + Ok(()) + } + fn visit_import(&mut self, _h: &mut Heap, _import: ImportId) -> VisitorResult { + Ok(()) + } + + fn visit_symbol_definition(&mut self, h: &mut Heap, def: DefinitionId) -> VisitorResult { + recursive_symbol_definition(self, h, def) + } + fn visit_struct_definition(&mut self, h: &mut Heap, def: StructId) -> VisitorResult { + Ok(()) + } + fn visit_enum_definition(&mut self, h: &mut Heap, def: EnumId) -> VisitorResult { + Ok(()) + } + fn visit_component_definition(&mut self, h: &mut Heap, def: ComponentId) -> VisitorResult { + recursive_component_definition(self, h, def) + } + fn visit_composite_definition(&mut self, h: &mut Heap, def: CompositeId) -> VisitorResult { + recursive_composite_definition(self, h, def) + } + fn visit_primitive_definition(&mut self, h: &mut Heap, def: PrimitiveId) -> VisitorResult { + recursive_primitive_definition(self, h, def) + } + fn visit_function_definition(&mut self, h: &mut Heap, def: FunctionId) -> VisitorResult { + recursive_function_definition(self, h, def) + } + + fn visit_variable_declaration(&mut self, h: &mut Heap, decl: VariableId) -> VisitorResult { + recursive_variable_declaration(self, h, decl) + } + fn visit_parameter_declaration(&mut self, _h: &mut Heap, _decl: ParameterId) -> VisitorResult { + Ok(()) + } + fn visit_local_declaration(&mut self, _h: &mut Heap, _decl: LocalId) -> VisitorResult { + Ok(()) + } + + fn visit_statement(&mut self, h: &mut Heap, stmt: StatementId) -> VisitorResult { + recursive_statement(self, h, stmt) + } + fn visit_local_statement(&mut self, h: &mut Heap, stmt: LocalStatementId) -> VisitorResult { + recursive_local_statement(self, h, stmt) + } + fn visit_memory_statement(&mut self, h: &mut Heap, stmt: MemoryStatementId) -> VisitorResult { + recursive_memory_statement(self, h, stmt) + } + fn visit_channel_statement( + &mut self, + _h: &mut Heap, + _stmt: ChannelStatementId, + ) -> VisitorResult { + Ok(()) + } + fn visit_block_statement(&mut self, h: &mut Heap, stmt: BlockStatementId) -> VisitorResult { + recursive_block_statement(self, h, stmt) + } + fn visit_labeled_statement(&mut self, h: &mut Heap, stmt: LabeledStatementId) -> VisitorResult { + recursive_labeled_statement(self, h, stmt) + } + fn visit_skip_statement(&mut self, _h: &mut Heap, _stmt: SkipStatementId) -> VisitorResult { + Ok(()) + } + fn visit_if_statement(&mut self, h: &mut Heap, stmt: IfStatementId) -> VisitorResult { + recursive_if_statement(self, h, stmt) + } + fn visit_while_statement(&mut self, h: &mut Heap, stmt: WhileStatementId) -> VisitorResult { + recursive_while_statement(self, h, stmt) + } + fn visit_break_statement(&mut self, _h: &mut Heap, _stmt: BreakStatementId) -> VisitorResult { + Ok(()) + } + fn visit_continue_statement( + &mut self, + _h: &mut Heap, + _stmt: ContinueStatementId, + ) -> VisitorResult { + Ok(()) + } + fn visit_synchronous_statement( + &mut self, + h: &mut Heap, + stmt: SynchronousStatementId, + ) -> VisitorResult { + recursive_synchronous_statement(self, h, stmt) + } + fn visit_return_statement(&mut self, h: &mut Heap, stmt: ReturnStatementId) -> VisitorResult { + recursive_return_statement(self, h, stmt) + } + fn visit_assert_statement(&mut self, h: &mut Heap, stmt: AssertStatementId) -> VisitorResult { + recursive_assert_statement(self, h, stmt) + } + fn visit_goto_statement(&mut self, _h: &mut Heap, _stmt: GotoStatementId) -> VisitorResult { + Ok(()) + } + fn visit_new_statement(&mut self, h: &mut Heap, stmt: NewStatementId) -> VisitorResult { + recursive_new_statement(self, h, stmt) + } + fn visit_put_statement(&mut self, h: &mut Heap, stmt: PutStatementId) -> VisitorResult { + recursive_put_statement(self, h, stmt) + } + fn visit_expression_statement( + &mut self, + h: &mut Heap, + stmt: ExpressionStatementId, + ) -> VisitorResult { + recursive_expression_statement(self, h, stmt) + } + + fn visit_expression(&mut self, h: &mut Heap, expr: ExpressionId) -> VisitorResult { + recursive_expression(self, h, expr) + } + fn visit_assignment_expression( + &mut self, + h: &mut Heap, + expr: AssignmentExpressionId, + ) -> VisitorResult { + recursive_assignment_expression(self, h, expr) + } + fn visit_conditional_expression( + &mut self, + h: &mut Heap, + expr: ConditionalExpressionId, + ) -> VisitorResult { + recursive_conditional_expression(self, h, expr) + } + fn visit_binary_expression(&mut self, h: &mut Heap, expr: BinaryExpressionId) -> VisitorResult { + recursive_binary_expression(self, h, expr) + } + fn visit_unary_expression(&mut self, h: &mut Heap, expr: UnaryExpressionId) -> VisitorResult { + recursive_unary_expression(self, h, expr) + } + fn visit_indexing_expression( + &mut self, + h: &mut Heap, + expr: IndexingExpressionId, + ) -> VisitorResult { + recursive_indexing_expression(self, h, expr) + } + fn visit_slicing_expression( + &mut self, + h: &mut Heap, + expr: SlicingExpressionId, + ) -> VisitorResult { + recursive_slicing_expression(self, h, expr) + } + fn visit_select_expression(&mut self, h: &mut Heap, expr: SelectExpressionId) -> VisitorResult { + recursive_select_expression(self, h, expr) + } + fn visit_array_expression(&mut self, h: &mut Heap, expr: ArrayExpressionId) -> VisitorResult { + recursive_array_expression(self, h, expr) + } + fn visit_call_expression(&mut self, h: &mut Heap, expr: CallExpressionId) -> VisitorResult { + recursive_call_expression(self, h, expr) + } + fn visit_constant_expression( + &mut self, + _h: &mut Heap, + _expr: ConstantExpressionId, + ) -> VisitorResult { + Ok(()) + } + fn visit_variable_expression( + &mut self, + _h: &mut Heap, + _expr: VariableExpressionId, + ) -> VisitorResult { + Ok(()) + } +} + +// Bubble-up helpers +fn recursive_parameter_as_variable( + this: &mut T, + h: &mut Heap, + param: ParameterId, +) -> VisitorResult { + this.visit_variable_declaration(h, param.upcast()) +} + +fn recursive_local_as_variable( + this: &mut T, + h: &mut Heap, + local: LocalId, +) -> VisitorResult { + this.visit_variable_declaration(h, local.upcast()) +} + +fn recursive_call_expression_as_expression( + this: &mut T, + h: &mut Heap, + call: CallExpressionId, +) -> VisitorResult { + this.visit_expression(h, call.upcast()) +} + +// Recursive procedures +fn recursive_protocol_description( + this: &mut T, + h: &mut Heap, + pd: RootId, +) -> VisitorResult { + for &pragma in h[pd].pragmas.clone().iter() { + this.visit_pragma(h, pragma)?; + } + for &import in h[pd].imports.clone().iter() { + this.visit_import(h, import)?; + } + for &def in h[pd].definitions.clone().iter() { + this.visit_symbol_definition(h, def)?; + } + Ok(()) +} + +fn recursive_symbol_definition( + this: &mut T, + h: &mut Heap, + def: DefinitionId, +) -> VisitorResult { + // We clone the definition in case it is modified + // TODO: Fix me + match h[def].clone() { + Definition::Struct(def) => this.visit_struct_definition(h, def.this), + Definition::Enum(def) => this.visit_enum_definition(h, def.this), + Definition::Component(cdef) => this.visit_component_definition(h, cdef.this()), + Definition::Function(fdef) => this.visit_function_definition(h, fdef.this), + } +} + +fn recursive_component_definition( + this: &mut T, + h: &mut Heap, + def: ComponentId, +) -> VisitorResult { + match h[def].clone() { + Component::Composite(cdef) => this.visit_composite_definition(h, cdef.this), + Component::Primitive(pdef) => this.visit_primitive_definition(h, pdef.this), + } +} + +fn recursive_composite_definition( + this: &mut T, + h: &mut Heap, + def: CompositeId, +) -> VisitorResult { + for ¶m in h[def].parameters.clone().iter() { + recursive_parameter_as_variable(this, h, param)?; + } + this.visit_statement(h, h[def].body) +} + +fn recursive_primitive_definition( + this: &mut T, + h: &mut Heap, + def: PrimitiveId, +) -> VisitorResult { + for ¶m in h[def].parameters.clone().iter() { + recursive_parameter_as_variable(this, h, param)?; + } + this.visit_statement(h, h[def].body) +} + +fn recursive_function_definition( + this: &mut T, + h: &mut Heap, + def: FunctionId, +) -> VisitorResult { + for ¶m in h[def].parameters.clone().iter() { + recursive_parameter_as_variable(this, h, param)?; + } + this.visit_statement(h, h[def].body) +} + +fn recursive_variable_declaration( + this: &mut T, + h: &mut Heap, + decl: VariableId, +) -> VisitorResult { + match h[decl].clone() { + Variable::Parameter(decl) => this.visit_parameter_declaration(h, decl.this), + Variable::Local(decl) => this.visit_local_declaration(h, decl.this), + } +} + +fn recursive_statement(this: &mut T, h: &mut Heap, stmt: StatementId) -> VisitorResult { + match h[stmt].clone() { + Statement::Block(stmt) => this.visit_block_statement(h, stmt.this), + Statement::Local(stmt) => this.visit_local_statement(h, stmt.this()), + Statement::Skip(stmt) => this.visit_skip_statement(h, stmt.this), + Statement::Labeled(stmt) => this.visit_labeled_statement(h, stmt.this), + Statement::If(stmt) => this.visit_if_statement(h, stmt.this), + Statement::While(stmt) => this.visit_while_statement(h, stmt.this), + Statement::Break(stmt) => this.visit_break_statement(h, stmt.this), + Statement::Continue(stmt) => this.visit_continue_statement(h, stmt.this), + Statement::Synchronous(stmt) => this.visit_synchronous_statement(h, stmt.this), + Statement::Return(stmt) => this.visit_return_statement(h, stmt.this), + Statement::Assert(stmt) => this.visit_assert_statement(h, stmt.this), + Statement::Goto(stmt) => this.visit_goto_statement(h, stmt.this), + Statement::New(stmt) => this.visit_new_statement(h, stmt.this), + Statement::Put(stmt) => this.visit_put_statement(h, stmt.this), + Statement::Expression(stmt) => this.visit_expression_statement(h, stmt.this), + Statement::EndSynchronous(_) | Statement::EndWhile(_) | Statement::EndIf(_) => { + unreachable!() // pseudo-statement + } + } +} + +fn recursive_block_statement( + this: &mut T, + h: &mut Heap, + block: BlockStatementId, +) -> VisitorResult { + for &local in h[block].locals.clone().iter() { + recursive_local_as_variable(this, h, local)?; + } + for &stmt in h[block].statements.clone().iter() { + this.visit_statement(h, stmt)?; + } + Ok(()) +} + +fn recursive_local_statement( + this: &mut T, + h: &mut Heap, + stmt: LocalStatementId, +) -> VisitorResult { + match h[stmt].clone() { + LocalStatement::Channel(stmt) => this.visit_channel_statement(h, stmt.this), + LocalStatement::Memory(stmt) => this.visit_memory_statement(h, stmt.this), + } +} + +fn recursive_memory_statement( + this: &mut T, + h: &mut Heap, + stmt: MemoryStatementId, +) -> VisitorResult { + this.visit_expression(h, h[stmt].initial) +} + +fn recursive_labeled_statement( + this: &mut T, + h: &mut Heap, + stmt: LabeledStatementId, +) -> VisitorResult { + this.visit_statement(h, h[stmt].body) +} + +fn recursive_if_statement( + this: &mut T, + h: &mut Heap, + stmt: IfStatementId, +) -> VisitorResult { + this.visit_expression(h, h[stmt].test)?; + this.visit_statement(h, h[stmt].true_body)?; + this.visit_statement(h, h[stmt].false_body) +} + +fn recursive_while_statement( + this: &mut T, + h: &mut Heap, + stmt: WhileStatementId, +) -> VisitorResult { + this.visit_expression(h, h[stmt].test)?; + this.visit_statement(h, h[stmt].body) +} + +fn recursive_synchronous_statement( + this: &mut T, + h: &mut Heap, + stmt: SynchronousStatementId, +) -> VisitorResult { + for ¶m in h[stmt].parameters.clone().iter() { + recursive_parameter_as_variable(this, h, param)?; + } + this.visit_statement(h, h[stmt].body) +} + +fn recursive_return_statement( + this: &mut T, + h: &mut Heap, + stmt: ReturnStatementId, +) -> VisitorResult { + this.visit_expression(h, h[stmt].expression) +} + +fn recursive_assert_statement( + this: &mut T, + h: &mut Heap, + stmt: AssertStatementId, +) -> VisitorResult { + this.visit_expression(h, h[stmt].expression) +} + +fn recursive_new_statement( + this: &mut T, + h: &mut Heap, + stmt: NewStatementId, +) -> VisitorResult { + recursive_call_expression_as_expression(this, h, h[stmt].expression) +} + +fn recursive_put_statement( + this: &mut T, + h: &mut Heap, + stmt: PutStatementId, +) -> VisitorResult { + this.visit_expression(h, h[stmt].port)?; + this.visit_expression(h, h[stmt].message) +} + +fn recursive_expression_statement( + this: &mut T, + h: &mut Heap, + stmt: ExpressionStatementId, +) -> VisitorResult { + this.visit_expression(h, h[stmt].expression) +} + +fn recursive_expression( + this: &mut T, + h: &mut Heap, + expr: ExpressionId, +) -> VisitorResult { + match h[expr].clone() { + Expression::Assignment(expr) => this.visit_assignment_expression(h, expr.this), + Expression::Conditional(expr) => this.visit_conditional_expression(h, expr.this), + Expression::Binary(expr) => this.visit_binary_expression(h, expr.this), + Expression::Unary(expr) => this.visit_unary_expression(h, expr.this), + Expression::Indexing(expr) => this.visit_indexing_expression(h, expr.this), + Expression::Slicing(expr) => this.visit_slicing_expression(h, expr.this), + Expression::Select(expr) => this.visit_select_expression(h, expr.this), + Expression::Array(expr) => this.visit_array_expression(h, expr.this), + Expression::Constant(expr) => this.visit_constant_expression(h, expr.this), + Expression::Call(expr) => this.visit_call_expression(h, expr.this), + Expression::Variable(expr) => this.visit_variable_expression(h, expr.this), + } +} + +fn recursive_assignment_expression( + this: &mut T, + h: &mut Heap, + expr: AssignmentExpressionId, +) -> VisitorResult { + this.visit_expression(h, h[expr].left)?; + this.visit_expression(h, h[expr].right) +} + +fn recursive_conditional_expression( + this: &mut T, + h: &mut Heap, + expr: ConditionalExpressionId, +) -> VisitorResult { + this.visit_expression(h, h[expr].test)?; + this.visit_expression(h, h[expr].true_expression)?; + this.visit_expression(h, h[expr].false_expression) +} + +fn recursive_binary_expression( + this: &mut T, + h: &mut Heap, + expr: BinaryExpressionId, +) -> VisitorResult { + this.visit_expression(h, h[expr].left)?; + this.visit_expression(h, h[expr].right) +} + +fn recursive_unary_expression( + this: &mut T, + h: &mut Heap, + expr: UnaryExpressionId, +) -> VisitorResult { + this.visit_expression(h, h[expr].expression) +} + +fn recursive_indexing_expression( + this: &mut T, + h: &mut Heap, + expr: IndexingExpressionId, +) -> VisitorResult { + this.visit_expression(h, h[expr].subject)?; + this.visit_expression(h, h[expr].index) +} + +fn recursive_slicing_expression( + this: &mut T, + h: &mut Heap, + expr: SlicingExpressionId, +) -> VisitorResult { + this.visit_expression(h, h[expr].subject)?; + this.visit_expression(h, h[expr].from_index)?; + this.visit_expression(h, h[expr].to_index) +} + +fn recursive_select_expression( + this: &mut T, + h: &mut Heap, + expr: SelectExpressionId, +) -> VisitorResult { + this.visit_expression(h, h[expr].subject) +} + +fn recursive_array_expression( + this: &mut T, + h: &mut Heap, + expr: ArrayExpressionId, +) -> VisitorResult { + for &expr in h[expr].elements.clone().iter() { + this.visit_expression(h, expr)?; + } + Ok(()) +} + +fn recursive_call_expression( + this: &mut T, + h: &mut Heap, + expr: CallExpressionId, +) -> VisitorResult { + for &expr in h[expr].arguments.clone().iter() { + this.visit_expression(h, expr)?; + } + Ok(()) +} + +// ==================== +// Grammar Rules +// ==================== + +pub(crate) struct NestedSynchronousStatements { + illegal: bool, +} + +impl NestedSynchronousStatements { + pub(crate) fn new() -> Self { + NestedSynchronousStatements { illegal: false } + } +} + +impl Visitor for NestedSynchronousStatements { + fn visit_composite_definition(&mut self, h: &mut Heap, def: CompositeId) -> VisitorResult { + assert!(!self.illegal); + self.illegal = true; + recursive_composite_definition(self, h, def)?; + self.illegal = false; + Ok(()) + } + fn visit_function_definition(&mut self, h: &mut Heap, def: FunctionId) -> VisitorResult { + assert!(!self.illegal); + self.illegal = true; + recursive_function_definition(self, h, def)?; + self.illegal = false; + Ok(()) + } + fn visit_synchronous_statement( + &mut self, + h: &mut Heap, + stmt: SynchronousStatementId, + ) -> VisitorResult { + if self.illegal { + return Err(( + h[stmt].position(), + "Illegal nested synchronous statement".to_string(), + )); + } + self.illegal = true; + recursive_synchronous_statement(self, h, stmt)?; + self.illegal = false; + Ok(()) + } + fn visit_expression(&mut self, _h: &mut Heap, _expr: ExpressionId) -> VisitorResult { + Ok(()) + } +} + +pub(crate) struct ChannelStatementOccurrences { + illegal: bool, +} + +impl ChannelStatementOccurrences { + pub(crate) fn new() -> Self { + ChannelStatementOccurrences { illegal: false } + } +} + +impl Visitor for ChannelStatementOccurrences { + fn visit_primitive_definition(&mut self, h: &mut Heap, def: PrimitiveId) -> VisitorResult { + assert!(!self.illegal); + self.illegal = true; + recursive_primitive_definition(self, h, def)?; + self.illegal = false; + Ok(()) + } + fn visit_function_definition(&mut self, h: &mut Heap, def: FunctionId) -> VisitorResult { + assert!(!self.illegal); + self.illegal = true; + recursive_function_definition(self, h, def)?; + self.illegal = false; + Ok(()) + } + fn visit_channel_statement(&mut self, h: &mut Heap, stmt: ChannelStatementId) -> VisitorResult { + if self.illegal { + return Err((h[stmt].position(), "Illegal channel declaration".to_string())); + } + Ok(()) + } + fn visit_expression(&mut self, _h: &mut Heap, _expr: ExpressionId) -> VisitorResult { + Ok(()) + } +} + +pub(crate) struct FunctionStatementReturns {} + +impl FunctionStatementReturns { + pub(crate) fn new() -> Self { + FunctionStatementReturns {} + } + fn function_error(&self, position: InputPosition) -> VisitorResult { + Err((position, "Function definition must return".to_string())) + } +} + +impl Visitor for FunctionStatementReturns { + fn visit_component_definition(&mut self, _h: &mut Heap, _def: ComponentId) -> VisitorResult { + Ok(()) + } + fn visit_variable_declaration(&mut self, _h: &mut Heap, _decl: VariableId) -> VisitorResult { + Ok(()) + } + fn visit_block_statement(&mut self, h: &mut Heap, block: BlockStatementId) -> VisitorResult { + let len = h[block].statements.len(); + assert!(len > 0); + self.visit_statement(h, h[block].statements[len - 1]) + } + fn visit_skip_statement(&mut self, h: &mut Heap, stmt: SkipStatementId) -> VisitorResult { + self.function_error(h[stmt].position) + } + fn visit_break_statement(&mut self, h: &mut Heap, stmt: BreakStatementId) -> VisitorResult { + self.function_error(h[stmt].position) + } + fn visit_continue_statement( + &mut self, + h: &mut Heap, + stmt: ContinueStatementId, + ) -> VisitorResult { + self.function_error(h[stmt].position) + } + fn visit_assert_statement(&mut self, h: &mut Heap, stmt: AssertStatementId) -> VisitorResult { + self.function_error(h[stmt].position) + } + fn visit_new_statement(&mut self, h: &mut Heap, stmt: NewStatementId) -> VisitorResult { + self.function_error(h[stmt].position) + } + fn visit_expression_statement( + &mut self, + h: &mut Heap, + stmt: ExpressionStatementId, + ) -> VisitorResult { + self.function_error(h[stmt].position) + } + fn visit_expression(&mut self, _h: &mut Heap, _expr: ExpressionId) -> VisitorResult { + Ok(()) + } +} + +pub(crate) struct ComponentStatementReturnNew { + illegal_new: bool, + illegal_return: bool, +} + +impl ComponentStatementReturnNew { + pub(crate) fn new() -> Self { + ComponentStatementReturnNew { illegal_new: false, illegal_return: false } + } +} + +impl Visitor for ComponentStatementReturnNew { + fn visit_component_definition(&mut self, h: &mut Heap, def: ComponentId) -> VisitorResult { + assert!(!(self.illegal_new || self.illegal_return)); + self.illegal_return = true; + recursive_component_definition(self, h, def)?; + self.illegal_return = false; + Ok(()) + } + fn visit_primitive_definition(&mut self, h: &mut Heap, def: PrimitiveId) -> VisitorResult { + assert!(!self.illegal_new); + self.illegal_new = true; + recursive_primitive_definition(self, h, def)?; + self.illegal_new = false; + Ok(()) + } + fn visit_function_definition(&mut self, h: &mut Heap, def: FunctionId) -> VisitorResult { + assert!(!(self.illegal_new || self.illegal_return)); + self.illegal_new = true; + recursive_function_definition(self, h, def)?; + self.illegal_new = false; + Ok(()) + } + fn visit_variable_declaration(&mut self, _h: &mut Heap, _decl: VariableId) -> VisitorResult { + Ok(()) + } + fn visit_return_statement(&mut self, h: &mut Heap, stmt: ReturnStatementId) -> VisitorResult { + if self.illegal_return { + Err((h[stmt].position, "Component definition must not return".to_string())) + } else { + recursive_return_statement(self, h, stmt) + } + } + fn visit_new_statement(&mut self, h: &mut Heap, stmt: NewStatementId) -> VisitorResult { + if self.illegal_new { + Err(( + h[stmt].position, + "Symbol definition contains illegal new statement".to_string(), + )) + } else { + recursive_new_statement(self, h, stmt) + } + } + fn visit_expression(&mut self, _h: &mut Heap, _expr: ExpressionId) -> VisitorResult { + Ok(()) + } +} + +pub(crate) struct CheckBuiltinOccurrences { + legal: bool, +} + +impl CheckBuiltinOccurrences { + pub(crate) fn new() -> Self { + CheckBuiltinOccurrences { legal: false } + } +} + +impl Visitor for CheckBuiltinOccurrences { + fn visit_synchronous_statement( + &mut self, + h: &mut Heap, + stmt: SynchronousStatementId, + ) -> VisitorResult { + assert!(!self.legal); + self.legal = true; + recursive_synchronous_statement(self, h, stmt)?; + self.legal = false; + Ok(()) + } + fn visit_call_expression(&mut self, h: &mut Heap, expr: CallExpressionId) -> VisitorResult { + match h[expr].method { + Method::Get | Method::Fires => { + if !self.legal { + return Err((h[expr].position, "Illegal built-in occurrence".to_string())); + } + } + _ => {} + } + recursive_call_expression(self, h, expr) + } +} + +pub(crate) struct BuildSymbolDeclarations { + declarations: Vec, +} + +impl BuildSymbolDeclarations { + pub(crate) fn new() -> Self { + BuildSymbolDeclarations { declarations: Vec::new() } + } + fn checked_add(&mut self, h: &mut Heap, decl: DeclarationId) -> VisitorResult { + for &old in self.declarations.iter() { + let id = h[decl].identifier(); + if id.value == h[old].identifier().value { + return match h[decl].clone() { + Declaration::Defined(defined) => Err(( + h[defined.definition].position(), + format!("Defined symbol clash: {}", String::from_utf8_lossy(&id.value)), + )), + Declaration::Imported(imported) => Err(( + h[imported.import].position(), + format!("Imported symbol clash: {}", String::from_utf8_lossy(&id.value)), + )), + }; + } + } + self.declarations.push(decl); + Ok(()) + } +} + +impl Visitor for BuildSymbolDeclarations { + fn visit_protocol_description(&mut self, h: &mut Heap, pd: RootId) -> VisitorResult { + recursive_protocol_description(self, h, pd)?; + // Move all collected declarations to the protocol description + h[pd].declarations.append(&mut self.declarations); + Ok(()) + } + fn visit_import(&mut self, h: &mut Heap, import: ImportId) -> VisitorResult { + // println!("DEBUG: Warning (at {}:{}), import actually not yet implemented", file!(), line!()); + // TODO: Implement + let vec = library::get_declarations(h, import); + if let Err(_err)= vec { + return Err((h[import].position(), "Failed to perform import".to_string())) + } + + // Destructively iterate over the vector + for decl in vec.unwrap() { + self.checked_add(h, decl)?; + } + Ok(()) + } + fn visit_symbol_definition(&mut self, h: &mut Heap, definition: DefinitionId) -> VisitorResult { + let signature = Signature::from_definition(h, definition); + let decl = h + .alloc_defined_declaration(|this| DefinedDeclaration { this, definition, signature }) + .upcast(); + self.checked_add(h, decl)?; + Ok(()) + } +} + +pub(crate) struct LinkCallExpressions { + pd: Option, + composite: bool, + new_statement: bool, +} + +impl LinkCallExpressions { + pub(crate) fn new() -> Self { + LinkCallExpressions { pd: None, composite: false, new_statement: false } + } + fn get_declaration( + &self, + h: &Heap, + id: &Identifier, + ) -> Result { + match h[self.pd.unwrap()].get_declaration(h, &id) { + Some(id) => Ok(id), + None => Err((id.position, "Unresolved method".to_string())), + } + } +} + +impl Visitor for LinkCallExpressions { + fn visit_protocol_description(&mut self, h: &mut Heap, pd: RootId) -> VisitorResult { + self.pd = Some(pd); + recursive_protocol_description(self, h, pd)?; + self.pd = None; + Ok(()) + } + fn visit_composite_definition(&mut self, h: &mut Heap, def: CompositeId) -> VisitorResult { + assert!(!self.composite); + self.composite = true; + recursive_composite_definition(self, h, def)?; + self.composite = false; + Ok(()) + } + fn visit_new_statement(&mut self, h: &mut Heap, stmt: NewStatementId) -> VisitorResult { + assert!(self.composite); + assert!(!self.new_statement); + self.new_statement = true; + recursive_new_statement(self, h, stmt)?; + self.new_statement = false; + Ok(()) + } + fn visit_call_expression(&mut self, h: &mut Heap, expr: CallExpressionId) -> VisitorResult { + if let Method::Symbolic(id) = &h[expr].method { + // TODO: @symbol_table + let decl = self.get_declaration(h, id)?; + if self.new_statement && h[decl].is_function() { + return Err((id.position, "Illegal call expression".to_string())); + } + if !self.new_statement && h[decl].is_component() { + return Err((id.position, "Illegal call expression".to_string())); + } + // Set the corresponding declaration of the call + h[expr].declaration = Some(decl); + } + // A new statement's call expression may have as arguments function calls + let old = self.new_statement; + self.new_statement = false; + recursive_call_expression(self, h, expr)?; + self.new_statement = old; + Ok(()) + } +} + +pub(crate) struct BuildScope { + scope: Option, +} + +impl BuildScope { + pub(crate) fn new() -> Self { + BuildScope { scope: None } + } +} + +impl Visitor for BuildScope { + fn visit_symbol_definition(&mut self, h: &mut Heap, def: DefinitionId) -> VisitorResult { + assert!(self.scope.is_none()); + self.scope = Some(Scope::Definition(def)); + recursive_symbol_definition(self, h, def)?; + self.scope = None; + Ok(()) + } + fn visit_block_statement(&mut self, h: &mut Heap, stmt: BlockStatementId) -> VisitorResult { + assert!(!self.scope.is_none()); + let old = self.scope; + // First store the current scope + h[stmt].parent_scope = self.scope; + // Then move scope down to current block + self.scope = Some(Scope::Block(stmt)); + recursive_block_statement(self, h, stmt)?; + // Move scope back up + self.scope = old; + Ok(()) + } + fn visit_synchronous_statement( + &mut self, + h: &mut Heap, + stmt: SynchronousStatementId, + ) -> VisitorResult { + assert!(!self.scope.is_none()); + let old = self.scope; + // First store the current scope + h[stmt].parent_scope = self.scope; + // Then move scope down to current sync + self.scope = Some(Scope::Synchronous(stmt)); + recursive_synchronous_statement(self, h, stmt)?; + // Move scope back up + self.scope = old; + Ok(()) + } + fn visit_expression(&mut self, _h: &mut Heap, _expr: ExpressionId) -> VisitorResult { + Ok(()) + } +} + +pub(crate) struct ResolveVariables { + scope: Option, +} + +impl ResolveVariables { + pub(crate) fn new() -> Self { + ResolveVariables { scope: None } + } + fn get_variable(&self, h: &Heap, id: &Identifier) -> Result { + if let Some(var) = self.find_variable(h, id) { + Ok(var) + } else { + Err((id.position, "Unresolved variable".to_string())) + } + } + fn find_variable(&self, h: &Heap, id: &Identifier) -> Option { + ResolveVariables::find_variable_impl(h, self.scope, id) + } + fn find_variable_impl( + h: &Heap, + scope: Option, + id: &Identifier, + ) -> Option { + if let Some(scope) = scope { + // The order in which we check for variables is important: + // otherwise, two variables with the same name are shadowed. + if let Some(var) = ResolveVariables::find_variable_impl(h, scope.parent_scope(h), id) { + Some(var) + } else { + scope.get_variable(h, id) + } + } else { + None + } + } +} + +impl Visitor for ResolveVariables { + fn visit_symbol_definition(&mut self, h: &mut Heap, def: DefinitionId) -> VisitorResult { + assert!(self.scope.is_none()); + self.scope = Some(Scope::Definition(def)); + recursive_symbol_definition(self, h, def)?; + self.scope = None; + Ok(()) + } + fn visit_variable_declaration(&mut self, h: &mut Heap, decl: VariableId) -> VisitorResult { + // This is only called for parameters of definitions and synchronous statements, + // since the local variables of block statements are still empty + // the moment it is traversed. After resolving variables, this + // function is also called for every local variable declaration. + + // We want to make sure that the resolved variable is the variable declared itself; + // otherwise, there is some variable defined in the parent scope. This check + // imposes that the order in which find_variable looks is significant! + let id = h[decl].identifier(); + let check_same = self.find_variable(h, id); + if let Some(check_same) = check_same { + if check_same != decl { + return Err((id.position, "Declared variable clash".to_string())); + } + } + recursive_variable_declaration(self, h, decl) + } + fn visit_memory_statement(&mut self, h: &mut Heap, stmt: MemoryStatementId) -> VisitorResult { + assert!(!self.scope.is_none()); + let var = h[stmt].variable; + let id = &h[var].identifier; + // First check whether variable with same identifier is in scope + let check_duplicate = self.find_variable(h, id); + if check_duplicate.is_some() { + return Err((id.position, "Declared variable clash".to_string())); + } + // Then check the expression's variables (this should not refer to own variable) + recursive_memory_statement(self, h, stmt)?; + // Finally, we may add the variable to the scope, which is guaranteed to be a block + { + let block = &mut h[self.scope.unwrap().to_block()]; + block.locals.push(var); + } + Ok(()) + } + fn visit_channel_statement(&mut self, h: &mut Heap, stmt: ChannelStatementId) -> VisitorResult { + assert!(!self.scope.is_none()); + // First handle the from variable + { + let var = h[stmt].from; + let id = &h[var].identifier; + let check_duplicate = self.find_variable(h, id); + if check_duplicate.is_some() { + return Err((id.position, "Declared variable clash".to_string())); + } + let block = &mut h[self.scope.unwrap().to_block()]; + block.locals.push(var); + } + // Then handle the to variable (which may not be the same as the from) + { + let var = h[stmt].to; + let id = &h[var].identifier; + let check_duplicate = self.find_variable(h, id); + if check_duplicate.is_some() { + return Err((id.position, "Declared variable clash".to_string())); + } + let block = &mut h[self.scope.unwrap().to_block()]; + block.locals.push(var); + } + Ok(()) + } + fn visit_block_statement(&mut self, h: &mut Heap, stmt: BlockStatementId) -> VisitorResult { + assert!(!self.scope.is_none()); + let old = self.scope; + self.scope = Some(Scope::Block(stmt)); + recursive_block_statement(self, h, stmt)?; + self.scope = old; + Ok(()) + } + fn visit_synchronous_statement( + &mut self, + h: &mut Heap, + stmt: SynchronousStatementId, + ) -> VisitorResult { + assert!(!self.scope.is_none()); + let old = self.scope; + self.scope = Some(Scope::Synchronous(stmt)); + recursive_synchronous_statement(self, h, stmt)?; + self.scope = old; + Ok(()) + } + fn visit_variable_expression( + &mut self, + h: &mut Heap, + expr: VariableExpressionId, + ) -> VisitorResult { + let var = self.get_variable(h, &h[expr].identifier)?; + h[expr].declaration = Some(var); + Ok(()) + } +} + +pub(crate) struct UniqueStatementId(StatementId); + +pub(crate) struct LinkStatements { + prev: Option, +} + +impl LinkStatements { + pub(crate) fn new() -> Self { + LinkStatements { prev: None } + } +} + +impl Visitor for LinkStatements { + fn visit_symbol_definition(&mut self, h: &mut Heap, def: DefinitionId) -> VisitorResult { + assert!(self.prev.is_none()); + recursive_symbol_definition(self, h, def)?; + // Clear out last statement + self.prev = None; + Ok(()) + } + fn visit_statement(&mut self, h: &mut Heap, stmt: StatementId) -> VisitorResult { + if let Some(UniqueStatementId(prev)) = self.prev.take() { + h[prev].link_next(stmt); + } + recursive_statement(self, h, stmt) + } + fn visit_local_statement(&mut self, _h: &mut Heap, stmt: LocalStatementId) -> VisitorResult { + self.prev = Some(UniqueStatementId(stmt.upcast())); + Ok(()) + } + fn visit_labeled_statement(&mut self, h: &mut Heap, stmt: LabeledStatementId) -> VisitorResult { + recursive_labeled_statement(self, h, stmt) + } + fn visit_skip_statement(&mut self, _h: &mut Heap, stmt: SkipStatementId) -> VisitorResult { + self.prev = Some(UniqueStatementId(stmt.upcast())); + Ok(()) + } + fn visit_if_statement(&mut self, h: &mut Heap, stmt: IfStatementId) -> VisitorResult { + // We allocate a pseudo-statement, which combines both branches into one next statement + let position = h[stmt].position; + let pseudo = + h.alloc_end_if_statement(|this| EndIfStatement { this, position, next: None }).upcast(); + assert!(self.prev.is_none()); + self.visit_statement(h, h[stmt].true_body)?; + if let Some(UniqueStatementId(prev)) = self.prev.take() { + h[prev].link_next(pseudo); + } + assert!(self.prev.is_none()); + self.visit_statement(h, h[stmt].false_body)?; + if let Some(UniqueStatementId(prev)) = self.prev.take() { + h[prev].link_next(pseudo); + } + // Use the pseudo-statement as the statement where to update the next pointer + self.prev = Some(UniqueStatementId(pseudo)); + Ok(()) + } + fn visit_while_statement(&mut self, h: &mut Heap, stmt: WhileStatementId) -> VisitorResult { + // We allocate a pseudo-statement, to which the break statement finds its target + let position = h[stmt].position; + let pseudo = + h.alloc_end_while_statement(|this| EndWhileStatement { this, position, next: None }); + // Update the while's next statement to point to the pseudo-statement + h[stmt].next = Some(pseudo); + assert!(self.prev.is_none()); + self.visit_statement(h, h[stmt].body)?; + // The body's next statement loops back to the while statement itself + // Note: continue statements also loop back to the while statement itself + if let Some(UniqueStatementId(prev)) = std::mem::replace(&mut self.prev, None) { + h[prev].link_next(stmt.upcast()); + } + // Use the while statement as the statement where the next pointer is updated + self.prev = Some(UniqueStatementId(pseudo.upcast())); + Ok(()) + } + fn visit_break_statement(&mut self, _h: &mut Heap, _stmt: BreakStatementId) -> VisitorResult { + Ok(()) + } + fn visit_continue_statement( + &mut self, + _h: &mut Heap, + _stmt: ContinueStatementId, + ) -> VisitorResult { + Ok(()) + } + fn visit_synchronous_statement( + &mut self, + h: &mut Heap, + stmt: SynchronousStatementId, + ) -> VisitorResult { + // Allocate a pseudo-statement, that is added for helping the evaluator to issue a command + // that marks the end of the synchronous block. Every evaluation has to pause at this + // point, only to resume later when the thread is selected as unique thread to continue. + let position = h[stmt].position; + let pseudo = h + .alloc_end_synchronous_statement(|this| EndSynchronousStatement { + this, + position, + next: None, + }) + .upcast(); + assert!(self.prev.is_none()); + self.visit_statement(h, h[stmt].body)?; + // The body's next statement points to the pseudo element + if let Some(UniqueStatementId(prev)) = std::mem::replace(&mut self.prev, None) { + h[prev].link_next(pseudo); + } + // Use the pseudo-statement as the statement where the next pointer is updated + self.prev = Some(UniqueStatementId(pseudo)); + Ok(()) + } + fn visit_return_statement(&mut self, _h: &mut Heap, _stmt: ReturnStatementId) -> VisitorResult { + Ok(()) + } + fn visit_assert_statement(&mut self, _h: &mut Heap, stmt: AssertStatementId) -> VisitorResult { + self.prev = Some(UniqueStatementId(stmt.upcast())); + Ok(()) + } + fn visit_goto_statement(&mut self, _h: &mut Heap, _stmt: GotoStatementId) -> VisitorResult { + Ok(()) + } + fn visit_new_statement(&mut self, _h: &mut Heap, stmt: NewStatementId) -> VisitorResult { + self.prev = Some(UniqueStatementId(stmt.upcast())); + Ok(()) + } + fn visit_put_statement(&mut self, _h: &mut Heap, stmt: PutStatementId) -> VisitorResult { + self.prev = Some(UniqueStatementId(stmt.upcast())); + Ok(()) + } + fn visit_expression_statement( + &mut self, + _h: &mut Heap, + stmt: ExpressionStatementId, + ) -> VisitorResult { + self.prev = Some(UniqueStatementId(stmt.upcast())); + Ok(()) + } + fn visit_expression(&mut self, _h: &mut Heap, _expr: ExpressionId) -> VisitorResult { + Ok(()) + } +} + +pub(crate) struct BuildLabels { + block: Option, + sync_enclosure: Option, +} + +impl BuildLabels { + pub(crate) fn new() -> Self { + BuildLabels { block: None, sync_enclosure: None } + } +} + +impl Visitor for BuildLabels { + fn visit_block_statement(&mut self, h: &mut Heap, stmt: BlockStatementId) -> VisitorResult { + assert_eq!(self.block, h[stmt].parent_block(h)); + let old = self.block; + self.block = Some(stmt); + recursive_block_statement(self, h, stmt)?; + self.block = old; + Ok(()) + } + fn visit_labeled_statement(&mut self, h: &mut Heap, stmt: LabeledStatementId) -> VisitorResult { + assert!(!self.block.is_none()); + // Store label in current block (on the fly) + h[self.block.unwrap()].labels.push(stmt); + // Update synchronous scope of label + h[stmt].in_sync = self.sync_enclosure; + recursive_labeled_statement(self, h, stmt) + } + fn visit_while_statement(&mut self, h: &mut Heap, stmt: WhileStatementId) -> VisitorResult { + h[stmt].in_sync = self.sync_enclosure; + recursive_while_statement(self, h, stmt) + } + fn visit_synchronous_statement( + &mut self, + h: &mut Heap, + stmt: SynchronousStatementId, + ) -> VisitorResult { + assert!(self.sync_enclosure.is_none()); + self.sync_enclosure = Some(stmt); + recursive_synchronous_statement(self, h, stmt)?; + self.sync_enclosure = None; + Ok(()) + } + fn visit_expression(&mut self, _h: &mut Heap, _expr: ExpressionId) -> VisitorResult { + Ok(()) + } +} + +pub(crate) struct ResolveLabels { + block: Option, + while_enclosure: Option, + sync_enclosure: Option, +} + +impl ResolveLabels { + pub(crate) fn new() -> Self { + ResolveLabels { block: None, while_enclosure: None, sync_enclosure: None } + } + fn check_duplicate_impl( + h: &Heap, + block: Option, + stmt: LabeledStatementId, + ) -> VisitorResult { + if let Some(block) = block { + // Checking the parent first is important. Otherwise, labels + // overshadow previously defined labels: and this is illegal! + ResolveLabels::check_duplicate_impl(h, h[block].parent_block(h), stmt)?; + // For the current block, check for a duplicate. + for &other_stmt in h[block].labels.iter() { + if other_stmt == stmt { + continue; + } else { + if h[other_stmt].label.value == h[stmt].label.value { + return Err((h[stmt].position, "Duplicate label".to_string())); + } + } + } + } + Ok(()) + } + fn check_duplicate(&self, h: &Heap, stmt: LabeledStatementId) -> VisitorResult { + ResolveLabels::check_duplicate_impl(h, self.block, stmt) + } + fn get_target( + &self, + h: &Heap, + id: &Identifier, + ) -> Result { + if let Some(stmt) = ResolveLabels::find_target(h, self.block, id) { + Ok(stmt) + } else { + Err((id.position, "Unresolved label".to_string())) + } + } + fn find_target( + h: &Heap, + block: Option, + id: &Identifier, + ) -> Option { + if let Some(block) = block { + // It does not matter in what order we find the labels. + // If there are duplicates: that is checked elsewhere. + for &stmt in h[block].labels.iter() { + if h[stmt].label.value == id.value { + return Some(stmt); + } + } + if let Some(stmt) = ResolveLabels::find_target(h, h[block].parent_block(h), id) { + return Some(stmt); + } + } + None + } +} + +impl Visitor for ResolveLabels { + fn visit_block_statement(&mut self, h: &mut Heap, stmt: BlockStatementId) -> VisitorResult { + assert_eq!(self.block, h[stmt].parent_block(h)); + let old = self.block; + self.block = Some(stmt); + recursive_block_statement(self, h, stmt)?; + self.block = old; + Ok(()) + } + fn visit_labeled_statement(&mut self, h: &mut Heap, stmt: LabeledStatementId) -> VisitorResult { + assert!(!self.block.is_none()); + self.check_duplicate(h, stmt)?; + recursive_labeled_statement(self, h, stmt) + } + fn visit_while_statement(&mut self, h: &mut Heap, stmt: WhileStatementId) -> VisitorResult { + let old = self.while_enclosure; + self.while_enclosure = Some(stmt); + recursive_while_statement(self, h, stmt)?; + self.while_enclosure = old; + Ok(()) + } + fn visit_break_statement(&mut self, h: &mut Heap, stmt: BreakStatementId) -> VisitorResult { + let the_while; + if let Some(label) = &h[stmt].label { + let target = self.get_target(h, label)?; + let target = &h[h[target].body]; + if !target.is_while() { + return Err(( + h[stmt].position, + "Illegal break: target not a while statement".to_string(), + )); + } + the_while = target.as_while(); + // TODO: check if break is nested under while + } else { + if self.while_enclosure.is_none() { + return Err(( + h[stmt].position, + "Illegal break: no surrounding while statement".to_string(), + )); + } + the_while = &h[self.while_enclosure.unwrap()]; + // break is always nested under while, by recursive vistor + } + if the_while.in_sync != self.sync_enclosure { + return Err(( + h[stmt].position, + "Illegal break: synchronous statement escape".to_string(), + )); + } + h[stmt].target = the_while.next; + Ok(()) + } + fn visit_continue_statement( + &mut self, + h: &mut Heap, + stmt: ContinueStatementId, + ) -> VisitorResult { + let the_while; + if let Some(label) = &h[stmt].label { + let target = self.get_target(h, label)?; + let target = &h[h[target].body]; + if !target.is_while() { + return Err(( + h[stmt].position, + "Illegal continue: target not a while statement".to_string(), + )); + } + the_while = target.as_while(); + // TODO: check if continue is nested under while + } else { + if self.while_enclosure.is_none() { + return Err(( + h[stmt].position, + "Illegal continue: no surrounding while statement".to_string(), + )); + } + the_while = &h[self.while_enclosure.unwrap()]; + // continue is always nested under while, by recursive vistor + } + if the_while.in_sync != self.sync_enclosure { + return Err(( + h[stmt].position, + "Illegal continue: synchronous statement escape".to_string(), + )); + } + h[stmt].target = Some(the_while.this); + Ok(()) + } + fn visit_synchronous_statement( + &mut self, + h: &mut Heap, + stmt: SynchronousStatementId, + ) -> VisitorResult { + assert!(self.sync_enclosure.is_none()); + self.sync_enclosure = Some(stmt); + recursive_synchronous_statement(self, h, stmt)?; + self.sync_enclosure = None; + Ok(()) + } + fn visit_goto_statement(&mut self, h: &mut Heap, stmt: GotoStatementId) -> VisitorResult { + let target = self.get_target(h, &h[stmt].label)?; + if h[target].in_sync != self.sync_enclosure { + return Err(( + h[stmt].position, + "Illegal goto: synchronous statement escape".to_string(), + )); + } + h[stmt].target = Some(target); + Ok(()) + } + fn visit_expression(&mut self, _h: &mut Heap, _expr: ExpressionId) -> VisitorResult { + Ok(()) + } +} + +pub(crate) struct AssignableExpressions { + assignable: bool, +} + +impl AssignableExpressions { + pub(crate) fn new() -> Self { + AssignableExpressions { assignable: false } + } + fn error(&self, position: InputPosition) -> VisitorResult { + Err((position, "Unassignable expression".to_string())) + } +} + +impl Visitor for AssignableExpressions { + fn visit_assignment_expression( + &mut self, + h: &mut Heap, + expr: AssignmentExpressionId, + ) -> VisitorResult { + if self.assignable { + self.error(h[expr].position) + } else { + self.assignable = true; + self.visit_expression(h, h[expr].left)?; + self.assignable = false; + self.visit_expression(h, h[expr].right) + } + } + fn visit_conditional_expression( + &mut self, + h: &mut Heap, + expr: ConditionalExpressionId, + ) -> VisitorResult { + if self.assignable { + self.error(h[expr].position) + } else { + recursive_conditional_expression(self, h, expr) + } + } + fn visit_binary_expression(&mut self, h: &mut Heap, expr: BinaryExpressionId) -> VisitorResult { + if self.assignable { + self.error(h[expr].position) + } else { + recursive_binary_expression(self, h, expr) + } + } + fn visit_unary_expression(&mut self, h: &mut Heap, expr: UnaryExpressionId) -> VisitorResult { + if self.assignable { + self.error(h[expr].position) + } else { + match h[expr].operation { + UnaryOperation::PostDecrement + | UnaryOperation::PreDecrement + | UnaryOperation::PostIncrement + | UnaryOperation::PreIncrement => { + self.assignable = true; + recursive_unary_expression(self, h, expr)?; + self.assignable = false; + Ok(()) + } + _ => recursive_unary_expression(self, h, expr), + } + } + } + fn visit_indexing_expression( + &mut self, + h: &mut Heap, + expr: IndexingExpressionId, + ) -> VisitorResult { + let old = self.assignable; + self.assignable = false; + recursive_indexing_expression(self, h, expr)?; + self.assignable = old; + Ok(()) + } + fn visit_slicing_expression( + &mut self, + h: &mut Heap, + expr: SlicingExpressionId, + ) -> VisitorResult { + let old = self.assignable; + self.assignable = false; + recursive_slicing_expression(self, h, expr)?; + self.assignable = old; + Ok(()) + } + fn visit_select_expression(&mut self, h: &mut Heap, expr: SelectExpressionId) -> VisitorResult { + if h[expr].field.is_length() && self.assignable { + return self.error(h[expr].position); + } + let old = self.assignable; + self.assignable = false; + recursive_select_expression(self, h, expr)?; + self.assignable = old; + Ok(()) + } + fn visit_array_expression(&mut self, h: &mut Heap, expr: ArrayExpressionId) -> VisitorResult { + if self.assignable { + self.error(h[expr].position) + } else { + recursive_array_expression(self, h, expr) + } + } + fn visit_call_expression(&mut self, h: &mut Heap, expr: CallExpressionId) -> VisitorResult { + if self.assignable { + self.error(h[expr].position) + } else { + recursive_call_expression(self, h, expr) + } + } + fn visit_constant_expression( + &mut self, + h: &mut Heap, + expr: ConstantExpressionId, + ) -> VisitorResult { + if self.assignable { + self.error(h[expr].position) + } else { + Ok(()) + } + } + fn visit_variable_expression( + &mut self, + _h: &mut Heap, + _expr: VariableExpressionId, + ) -> VisitorResult { + Ok(()) + } +} + +pub(crate) struct IndexableExpressions { + indexable: bool, +} + +impl IndexableExpressions { + pub(crate) fn new() -> Self { + IndexableExpressions { indexable: false } + } + fn error(&self, position: InputPosition) -> VisitorResult { + Err((position, "Unindexable expression".to_string())) + } +} + +impl Visitor for IndexableExpressions { + fn visit_assignment_expression( + &mut self, + h: &mut Heap, + expr: AssignmentExpressionId, + ) -> VisitorResult { + if self.indexable { + self.error(h[expr].position) + } else { + recursive_assignment_expression(self, h, expr) + } + } + fn visit_conditional_expression( + &mut self, + h: &mut Heap, + expr: ConditionalExpressionId, + ) -> VisitorResult { + let old = self.indexable; + self.indexable = false; + self.visit_expression(h, h[expr].test)?; + self.indexable = old; + self.visit_expression(h, h[expr].true_expression)?; + self.visit_expression(h, h[expr].false_expression) + } + fn visit_binary_expression(&mut self, h: &mut Heap, expr: BinaryExpressionId) -> VisitorResult { + if self.indexable && h[expr].operation != BinaryOperator::Concatenate { + self.error(h[expr].position) + } else { + recursive_binary_expression(self, h, expr) + } + } + fn visit_unary_expression(&mut self, h: &mut Heap, expr: UnaryExpressionId) -> VisitorResult { + if self.indexable { + self.error(h[expr].position) + } else { + recursive_unary_expression(self, h, expr) + } + } + fn visit_indexing_expression( + &mut self, + h: &mut Heap, + expr: IndexingExpressionId, + ) -> VisitorResult { + let old = self.indexable; + self.indexable = true; + self.visit_expression(h, h[expr].subject)?; + self.indexable = false; + self.visit_expression(h, h[expr].index)?; + self.indexable = old; + Ok(()) + } + fn visit_slicing_expression( + &mut self, + h: &mut Heap, + expr: SlicingExpressionId, + ) -> VisitorResult { + let old = self.indexable; + self.indexable = true; + self.visit_expression(h, h[expr].subject)?; + self.indexable = false; + self.visit_expression(h, h[expr].from_index)?; + self.visit_expression(h, h[expr].to_index)?; + self.indexable = old; + Ok(()) + } + fn visit_select_expression(&mut self, h: &mut Heap, expr: SelectExpressionId) -> VisitorResult { + let old = self.indexable; + self.indexable = false; + recursive_select_expression(self, h, expr)?; + self.indexable = old; + Ok(()) + } + fn visit_array_expression(&mut self, h: &mut Heap, expr: ArrayExpressionId) -> VisitorResult { + let old = self.indexable; + self.indexable = false; + recursive_array_expression(self, h, expr)?; + self.indexable = old; + Ok(()) + } + fn visit_call_expression(&mut self, h: &mut Heap, expr: CallExpressionId) -> VisitorResult { + let old = self.indexable; + self.indexable = false; + recursive_call_expression(self, h, expr)?; + self.indexable = old; + Ok(()) + } + fn visit_constant_expression( + &mut self, + h: &mut Heap, + expr: ConstantExpressionId, + ) -> VisitorResult { + if self.indexable { + self.error(h[expr].position) + } else { + Ok(()) + } + } +} + +pub(crate) struct SelectableExpressions { + selectable: bool, +} + +impl SelectableExpressions { + pub(crate) fn new() -> Self { + SelectableExpressions { selectable: false } + } + fn error(&self, position: InputPosition) -> VisitorResult { + Err((position, "Unselectable expression".to_string())) + } +} + +impl Visitor for SelectableExpressions { + fn visit_assignment_expression( + &mut self, + h: &mut Heap, + expr: AssignmentExpressionId, + ) -> VisitorResult { + // left-hand side of assignment can be skipped + let old = self.selectable; + self.selectable = false; + self.visit_expression(h, h[expr].right)?; + self.selectable = old; + Ok(()) + } + fn visit_conditional_expression( + &mut self, + h: &mut Heap, + expr: ConditionalExpressionId, + ) -> VisitorResult { + let old = self.selectable; + self.selectable = false; + self.visit_expression(h, h[expr].test)?; + self.selectable = old; + self.visit_expression(h, h[expr].true_expression)?; + self.visit_expression(h, h[expr].false_expression) + } + fn visit_binary_expression(&mut self, h: &mut Heap, expr: BinaryExpressionId) -> VisitorResult { + if self.selectable && h[expr].operation != BinaryOperator::Concatenate { + self.error(h[expr].position) + } else { + recursive_binary_expression(self, h, expr) + } + } + fn visit_unary_expression(&mut self, h: &mut Heap, expr: UnaryExpressionId) -> VisitorResult { + if self.selectable { + self.error(h[expr].position) + } else { + recursive_unary_expression(self, h, expr) + } + } + fn visit_indexing_expression( + &mut self, + h: &mut Heap, + expr: IndexingExpressionId, + ) -> VisitorResult { + let old = self.selectable; + self.selectable = false; + recursive_indexing_expression(self, h, expr)?; + self.selectable = old; + Ok(()) + } + fn visit_slicing_expression( + &mut self, + h: &mut Heap, + expr: SlicingExpressionId, + ) -> VisitorResult { + let old = self.selectable; + self.selectable = false; + recursive_slicing_expression(self, h, expr)?; + self.selectable = old; + Ok(()) + } + fn visit_select_expression(&mut self, h: &mut Heap, expr: SelectExpressionId) -> VisitorResult { + let old = self.selectable; + self.selectable = false; + recursive_select_expression(self, h, expr)?; + self.selectable = old; + Ok(()) + } + fn visit_array_expression(&mut self, h: &mut Heap, expr: ArrayExpressionId) -> VisitorResult { + let old = self.selectable; + self.selectable = false; + recursive_array_expression(self, h, expr)?; + self.selectable = old; + Ok(()) + } + fn visit_call_expression(&mut self, h: &mut Heap, expr: CallExpressionId) -> VisitorResult { + let old = self.selectable; + self.selectable = false; + recursive_call_expression(self, h, expr)?; + self.selectable = old; + Ok(()) + } + fn visit_constant_expression( + &mut self, + h: &mut Heap, + expr: ConstantExpressionId, + ) -> VisitorResult { + if self.selectable { + self.error(h[expr].position) + } else { + Ok(()) + } + } +} diff --git a/src/protocol/parser/mod.rs b/src/protocol/parser/mod.rs index 4fbb63b1ce5e21bd3d0374b153d33d04ea3accdb..56c55fd94a737851d21f329f74ce5a0d88abc887 100644 --- a/src/protocol/parser/mod.rs +++ b/src/protocol/parser/mod.rs @@ -1,1807 +1,203 @@ +mod depth_visitor; +mod symbol_table; +mod type_table; + +use depth_visitor::*; +use symbol_table::SymbolTable; + use crate::protocol::ast::*; use crate::protocol::inputsource::*; use crate::protocol::lexer::*; -use crate::protocol::library; - -// The following indirection is needed due to a bug in the cbindgen tool. -type Unit = (); -type VisitorResult = Result; - -trait Visitor: Sized { - fn visit_protocol_description(&mut self, h: &mut Heap, pd: RootId) -> VisitorResult { - recursive_protocol_description(self, h, pd) - } - fn visit_pragma(&mut self, _h: &mut Heap, _pragma: PragmaId) -> VisitorResult { - Ok(()) - } - fn visit_import(&mut self, _h: &mut Heap, _import: ImportId) -> VisitorResult { - Ok(()) - } - - fn visit_symbol_definition(&mut self, h: &mut Heap, def: DefinitionId) -> VisitorResult { - recursive_symbol_definition(self, h, def) - } - fn visit_component_definition(&mut self, h: &mut Heap, def: ComponentId) -> VisitorResult { - recursive_component_definition(self, h, def) - } - fn visit_composite_definition(&mut self, h: &mut Heap, def: CompositeId) -> VisitorResult { - recursive_composite_definition(self, h, def) - } - fn visit_primitive_definition(&mut self, h: &mut Heap, def: PrimitiveId) -> VisitorResult { - recursive_primitive_definition(self, h, def) - } - fn visit_function_definition(&mut self, h: &mut Heap, def: FunctionId) -> VisitorResult { - recursive_function_definition(self, h, def) - } - - fn visit_variable_declaration(&mut self, h: &mut Heap, decl: VariableId) -> VisitorResult { - recursive_variable_declaration(self, h, decl) - } - fn visit_parameter_declaration(&mut self, _h: &mut Heap, _decl: ParameterId) -> VisitorResult { - Ok(()) - } - fn visit_local_declaration(&mut self, _h: &mut Heap, _decl: LocalId) -> VisitorResult { - Ok(()) - } - - fn visit_statement(&mut self, h: &mut Heap, stmt: StatementId) -> VisitorResult { - recursive_statement(self, h, stmt) - } - fn visit_local_statement(&mut self, h: &mut Heap, stmt: LocalStatementId) -> VisitorResult { - recursive_local_statement(self, h, stmt) - } - fn visit_memory_statement(&mut self, h: &mut Heap, stmt: MemoryStatementId) -> VisitorResult { - recursive_memory_statement(self, h, stmt) - } - fn visit_channel_statement( - &mut self, - _h: &mut Heap, - _stmt: ChannelStatementId, - ) -> VisitorResult { - Ok(()) - } - fn visit_block_statement(&mut self, h: &mut Heap, stmt: BlockStatementId) -> VisitorResult { - recursive_block_statement(self, h, stmt) - } - fn visit_labeled_statement(&mut self, h: &mut Heap, stmt: LabeledStatementId) -> VisitorResult { - recursive_labeled_statement(self, h, stmt) - } - fn visit_skip_statement(&mut self, _h: &mut Heap, _stmt: SkipStatementId) -> VisitorResult { - Ok(()) - } - fn visit_if_statement(&mut self, h: &mut Heap, stmt: IfStatementId) -> VisitorResult { - recursive_if_statement(self, h, stmt) - } - fn visit_while_statement(&mut self, h: &mut Heap, stmt: WhileStatementId) -> VisitorResult { - recursive_while_statement(self, h, stmt) - } - fn visit_break_statement(&mut self, _h: &mut Heap, _stmt: BreakStatementId) -> VisitorResult { - Ok(()) - } - fn visit_continue_statement( - &mut self, - _h: &mut Heap, - _stmt: ContinueStatementId, - ) -> VisitorResult { - Ok(()) - } - fn visit_synchronous_statement( - &mut self, - h: &mut Heap, - stmt: SynchronousStatementId, - ) -> VisitorResult { - recursive_synchronous_statement(self, h, stmt) - } - fn visit_return_statement(&mut self, h: &mut Heap, stmt: ReturnStatementId) -> VisitorResult { - recursive_return_statement(self, h, stmt) - } - fn visit_assert_statement(&mut self, h: &mut Heap, stmt: AssertStatementId) -> VisitorResult { - recursive_assert_statement(self, h, stmt) - } - fn visit_goto_statement(&mut self, _h: &mut Heap, _stmt: GotoStatementId) -> VisitorResult { - Ok(()) - } - fn visit_new_statement(&mut self, h: &mut Heap, stmt: NewStatementId) -> VisitorResult { - recursive_new_statement(self, h, stmt) - } - fn visit_put_statement(&mut self, h: &mut Heap, stmt: PutStatementId) -> VisitorResult { - recursive_put_statement(self, h, stmt) - } - fn visit_expression_statement( - &mut self, - h: &mut Heap, - stmt: ExpressionStatementId, - ) -> VisitorResult { - recursive_expression_statement(self, h, stmt) - } - - fn visit_expression(&mut self, h: &mut Heap, expr: ExpressionId) -> VisitorResult { - recursive_expression(self, h, expr) - } - fn visit_assignment_expression( - &mut self, - h: &mut Heap, - expr: AssignmentExpressionId, - ) -> VisitorResult { - recursive_assignment_expression(self, h, expr) - } - fn visit_conditional_expression( - &mut self, - h: &mut Heap, - expr: ConditionalExpressionId, - ) -> VisitorResult { - recursive_conditional_expression(self, h, expr) - } - fn visit_binary_expression(&mut self, h: &mut Heap, expr: BinaryExpressionId) -> VisitorResult { - recursive_binary_expression(self, h, expr) - } - fn visit_unary_expression(&mut self, h: &mut Heap, expr: UnaryExpressionId) -> VisitorResult { - recursive_unary_expression(self, h, expr) - } - fn visit_indexing_expression( - &mut self, - h: &mut Heap, - expr: IndexingExpressionId, - ) -> VisitorResult { - recursive_indexing_expression(self, h, expr) - } - fn visit_slicing_expression( - &mut self, - h: &mut Heap, - expr: SlicingExpressionId, - ) -> VisitorResult { - recursive_slicing_expression(self, h, expr) - } - fn visit_select_expression(&mut self, h: &mut Heap, expr: SelectExpressionId) -> VisitorResult { - recursive_select_expression(self, h, expr) - } - fn visit_array_expression(&mut self, h: &mut Heap, expr: ArrayExpressionId) -> VisitorResult { - recursive_array_expression(self, h, expr) - } - fn visit_call_expression(&mut self, h: &mut Heap, expr: CallExpressionId) -> VisitorResult { - recursive_call_expression(self, h, expr) - } - fn visit_constant_expression( - &mut self, - _h: &mut Heap, - _expr: ConstantExpressionId, - ) -> VisitorResult { - Ok(()) - } - fn visit_variable_expression( - &mut self, - _h: &mut Heap, - _expr: VariableExpressionId, - ) -> VisitorResult { - Ok(()) - } -} - -// Bubble-up helpers -fn recursive_parameter_as_variable( - this: &mut T, - h: &mut Heap, - param: ParameterId, -) -> VisitorResult { - this.visit_variable_declaration(h, param.upcast()) -} - -fn recursive_local_as_variable( - this: &mut T, - h: &mut Heap, - local: LocalId, -) -> VisitorResult { - this.visit_variable_declaration(h, local.upcast()) -} - -fn recursive_call_expression_as_expression( - this: &mut T, - h: &mut Heap, - call: CallExpressionId, -) -> VisitorResult { - this.visit_expression(h, call.upcast()) -} - -// Recursive procedures -fn recursive_protocol_description( - this: &mut T, - h: &mut Heap, - pd: RootId, -) -> VisitorResult { - for &pragma in h[pd].pragmas.clone().iter() { - this.visit_pragma(h, pragma)?; - } - for &import in h[pd].imports.clone().iter() { - this.visit_import(h, import)?; - } - for &def in h[pd].definitions.clone().iter() { - this.visit_symbol_definition(h, def)?; - } - Ok(()) -} -fn recursive_symbol_definition( - this: &mut T, - h: &mut Heap, - def: DefinitionId, -) -> VisitorResult { - // We clone the definition in case it is modified - match h[def].clone() { - Definition::Component(cdef) => this.visit_component_definition(h, cdef.this()), - Definition::Function(fdef) => this.visit_function_definition(h, fdef.this), - } -} +use std::collections::HashMap; -fn recursive_component_definition( - this: &mut T, - h: &mut Heap, - def: ComponentId, -) -> VisitorResult { - match h[def].clone() { - Component::Composite(cdef) => this.visit_composite_definition(h, cdef.this), - Component::Primitive(pdef) => this.visit_primitive_definition(h, pdef.this), - } +// TODO: @fixme, pub qualifier +pub(crate) struct LexedModule { + pub(crate) source: InputSource, + module_name: Vec, + version: Option, + root_id: RootId, } -fn recursive_composite_definition( - this: &mut T, - h: &mut Heap, - def: CompositeId, -) -> VisitorResult { - for ¶m in h[def].parameters.clone().iter() { - recursive_parameter_as_variable(this, h, param)?; - } - this.visit_statement(h, h[def].body) +pub struct Parser { + pub(crate) heap: Heap, + pub(crate) modules: Vec, + pub(crate) module_lookup: HashMap, usize>, // from (optional) module name to `modules` idx } -fn recursive_primitive_definition( - this: &mut T, - h: &mut Heap, - def: PrimitiveId, -) -> VisitorResult { - for ¶m in h[def].parameters.clone().iter() { - recursive_parameter_as_variable(this, h, param)?; - } - this.visit_statement(h, h[def].body) -} - -fn recursive_function_definition( - this: &mut T, - h: &mut Heap, - def: FunctionId, -) -> VisitorResult { - for ¶m in h[def].parameters.clone().iter() { - recursive_parameter_as_variable(this, h, param)?; - } - this.visit_statement(h, h[def].body) -} - -fn recursive_variable_declaration( - this: &mut T, - h: &mut Heap, - decl: VariableId, -) -> VisitorResult { - match h[decl].clone() { - Variable::Parameter(decl) => this.visit_parameter_declaration(h, decl.this), - Variable::Local(decl) => this.visit_local_declaration(h, decl.this), - } -} - -fn recursive_statement(this: &mut T, h: &mut Heap, stmt: StatementId) -> VisitorResult { - match h[stmt].clone() { - Statement::Block(stmt) => this.visit_block_statement(h, stmt.this), - Statement::Local(stmt) => this.visit_local_statement(h, stmt.this()), - Statement::Skip(stmt) => this.visit_skip_statement(h, stmt.this), - Statement::Labeled(stmt) => this.visit_labeled_statement(h, stmt.this), - Statement::If(stmt) => this.visit_if_statement(h, stmt.this), - Statement::While(stmt) => this.visit_while_statement(h, stmt.this), - Statement::Break(stmt) => this.visit_break_statement(h, stmt.this), - Statement::Continue(stmt) => this.visit_continue_statement(h, stmt.this), - Statement::Synchronous(stmt) => this.visit_synchronous_statement(h, stmt.this), - Statement::Return(stmt) => this.visit_return_statement(h, stmt.this), - Statement::Assert(stmt) => this.visit_assert_statement(h, stmt.this), - Statement::Goto(stmt) => this.visit_goto_statement(h, stmt.this), - Statement::New(stmt) => this.visit_new_statement(h, stmt.this), - Statement::Put(stmt) => this.visit_put_statement(h, stmt.this), - Statement::Expression(stmt) => this.visit_expression_statement(h, stmt.this), - Statement::EndSynchronous(_) | Statement::EndWhile(_) | Statement::EndIf(_) => { - unreachable!() // pseudo-statement +impl Parser { + pub fn new() -> Self { + Parser{ + heap: Heap::new(), + modules: Vec::new(), + module_lookup: HashMap::new() } } -} - -fn recursive_block_statement( - this: &mut T, - h: &mut Heap, - block: BlockStatementId, -) -> VisitorResult { - for &local in h[block].locals.clone().iter() { - recursive_local_as_variable(this, h, local)?; - } - for &stmt in h[block].statements.clone().iter() { - this.visit_statement(h, stmt)?; - } - Ok(()) -} - -fn recursive_local_statement( - this: &mut T, - h: &mut Heap, - stmt: LocalStatementId, -) -> VisitorResult { - match h[stmt].clone() { - LocalStatement::Channel(stmt) => this.visit_channel_statement(h, stmt.this), - LocalStatement::Memory(stmt) => this.visit_memory_statement(h, stmt.this), - } -} - -fn recursive_memory_statement( - this: &mut T, - h: &mut Heap, - stmt: MemoryStatementId, -) -> VisitorResult { - this.visit_expression(h, h[stmt].initial) -} - -fn recursive_labeled_statement( - this: &mut T, - h: &mut Heap, - stmt: LabeledStatementId, -) -> VisitorResult { - this.visit_statement(h, h[stmt].body) -} - -fn recursive_if_statement( - this: &mut T, - h: &mut Heap, - stmt: IfStatementId, -) -> VisitorResult { - this.visit_expression(h, h[stmt].test)?; - this.visit_statement(h, h[stmt].true_body)?; - this.visit_statement(h, h[stmt].false_body) -} - -fn recursive_while_statement( - this: &mut T, - h: &mut Heap, - stmt: WhileStatementId, -) -> VisitorResult { - this.visit_expression(h, h[stmt].test)?; - this.visit_statement(h, h[stmt].body) -} - -fn recursive_synchronous_statement( - this: &mut T, - h: &mut Heap, - stmt: SynchronousStatementId, -) -> VisitorResult { - for ¶m in h[stmt].parameters.clone().iter() { - recursive_parameter_as_variable(this, h, param)?; - } - this.visit_statement(h, h[stmt].body) -} - -fn recursive_return_statement( - this: &mut T, - h: &mut Heap, - stmt: ReturnStatementId, -) -> VisitorResult { - this.visit_expression(h, h[stmt].expression) -} - -fn recursive_assert_statement( - this: &mut T, - h: &mut Heap, - stmt: AssertStatementId, -) -> VisitorResult { - this.visit_expression(h, h[stmt].expression) -} -fn recursive_new_statement( - this: &mut T, - h: &mut Heap, - stmt: NewStatementId, -) -> VisitorResult { - recursive_call_expression_as_expression(this, h, h[stmt].expression) -} - -fn recursive_put_statement( - this: &mut T, - h: &mut Heap, - stmt: PutStatementId, -) -> VisitorResult { - this.visit_expression(h, h[stmt].port)?; - this.visit_expression(h, h[stmt].message) -} - -fn recursive_expression_statement( - this: &mut T, - h: &mut Heap, - stmt: ExpressionStatementId, -) -> VisitorResult { - this.visit_expression(h, h[stmt].expression) -} - -fn recursive_expression( - this: &mut T, - h: &mut Heap, - expr: ExpressionId, -) -> VisitorResult { - match h[expr].clone() { - Expression::Assignment(expr) => this.visit_assignment_expression(h, expr.this), - Expression::Conditional(expr) => this.visit_conditional_expression(h, expr.this), - Expression::Binary(expr) => this.visit_binary_expression(h, expr.this), - Expression::Unary(expr) => this.visit_unary_expression(h, expr.this), - Expression::Indexing(expr) => this.visit_indexing_expression(h, expr.this), - Expression::Slicing(expr) => this.visit_slicing_expression(h, expr.this), - Expression::Select(expr) => this.visit_select_expression(h, expr.this), - Expression::Array(expr) => this.visit_array_expression(h, expr.this), - Expression::Constant(expr) => this.visit_constant_expression(h, expr.this), - Expression::Call(expr) => this.visit_call_expression(h, expr.this), - Expression::Variable(expr) => this.visit_variable_expression(h, expr.this), + // TODO: @fix, temporary implementation to keep code compilable + pub fn new_with_source(source: InputSource) -> Result { + let mut parser = Parser::new(); + parser.feed(source)?; + Ok(parser) } -} -fn recursive_assignment_expression( - this: &mut T, - h: &mut Heap, - expr: AssignmentExpressionId, -) -> VisitorResult { - this.visit_expression(h, h[expr].left)?; - this.visit_expression(h, h[expr].right) -} + pub fn feed(&mut self, mut source: InputSource) -> Result { + // Lex the input source + let mut lex = Lexer::new(&mut source); + let pd = lex.consume_protocol_description(&mut self.heap)?; -fn recursive_conditional_expression( - this: &mut T, - h: &mut Heap, - expr: ConditionalExpressionId, -) -> VisitorResult { - this.visit_expression(h, h[expr].test)?; - this.visit_expression(h, h[expr].true_expression)?; - this.visit_expression(h, h[expr].false_expression) -} + // Seek the module name and version + let root = &self.heap[pd]; + let mut module_name_pos = InputPosition::default(); + let mut module_name = Vec::new(); + let mut module_version_pos = InputPosition::default(); + let mut module_version = None; -fn recursive_binary_expression( - this: &mut T, - h: &mut Heap, - expr: BinaryExpressionId, -) -> VisitorResult { - this.visit_expression(h, h[expr].left)?; - this.visit_expression(h, h[expr].right) -} - -fn recursive_unary_expression( - this: &mut T, - h: &mut Heap, - expr: UnaryExpressionId, -) -> VisitorResult { - this.visit_expression(h, h[expr].expression) -} - -fn recursive_indexing_expression( - this: &mut T, - h: &mut Heap, - expr: IndexingExpressionId, -) -> VisitorResult { - this.visit_expression(h, h[expr].subject)?; - this.visit_expression(h, h[expr].index) -} - -fn recursive_slicing_expression( - this: &mut T, - h: &mut Heap, - expr: SlicingExpressionId, -) -> VisitorResult { - this.visit_expression(h, h[expr].subject)?; - this.visit_expression(h, h[expr].from_index)?; - this.visit_expression(h, h[expr].to_index) -} - -fn recursive_select_expression( - this: &mut T, - h: &mut Heap, - expr: SelectExpressionId, -) -> VisitorResult { - this.visit_expression(h, h[expr].subject) -} - -fn recursive_array_expression( - this: &mut T, - h: &mut Heap, - expr: ArrayExpressionId, -) -> VisitorResult { - for &expr in h[expr].elements.clone().iter() { - this.visit_expression(h, expr)?; - } - Ok(()) -} - -fn recursive_call_expression( - this: &mut T, - h: &mut Heap, - expr: CallExpressionId, -) -> VisitorResult { - for &expr in h[expr].arguments.clone().iter() { - this.visit_expression(h, expr)?; - } - Ok(()) -} - -// ==================== -// Grammar Rules -// ==================== - -struct NestedSynchronousStatements { - illegal: bool, -} - -impl NestedSynchronousStatements { - fn new() -> Self { - NestedSynchronousStatements { illegal: false } - } -} - -impl Visitor for NestedSynchronousStatements { - fn visit_composite_definition(&mut self, h: &mut Heap, def: CompositeId) -> VisitorResult { - assert!(!self.illegal); - self.illegal = true; - recursive_composite_definition(self, h, def)?; - self.illegal = false; - Ok(()) - } - fn visit_function_definition(&mut self, h: &mut Heap, def: FunctionId) -> VisitorResult { - assert!(!self.illegal); - self.illegal = true; - recursive_function_definition(self, h, def)?; - self.illegal = false; - Ok(()) - } - fn visit_synchronous_statement( - &mut self, - h: &mut Heap, - stmt: SynchronousStatementId, - ) -> VisitorResult { - if self.illegal { - return Err(ParseError::new( - h[stmt].position(), - "Illegal nested synchronous statement", - )); - } - self.illegal = true; - recursive_synchronous_statement(self, h, stmt)?; - self.illegal = false; - Ok(()) - } - fn visit_expression(&mut self, _h: &mut Heap, _expr: ExpressionId) -> VisitorResult { - Ok(()) - } -} - -struct ChannelStatementOccurrences { - illegal: bool, -} - -impl ChannelStatementOccurrences { - fn new() -> Self { - ChannelStatementOccurrences { illegal: false } - } -} - -impl Visitor for ChannelStatementOccurrences { - fn visit_primitive_definition(&mut self, h: &mut Heap, def: PrimitiveId) -> VisitorResult { - assert!(!self.illegal); - self.illegal = true; - recursive_primitive_definition(self, h, def)?; - self.illegal = false; - Ok(()) - } - fn visit_function_definition(&mut self, h: &mut Heap, def: FunctionId) -> VisitorResult { - assert!(!self.illegal); - self.illegal = true; - recursive_function_definition(self, h, def)?; - self.illegal = false; - Ok(()) - } - fn visit_channel_statement(&mut self, h: &mut Heap, stmt: ChannelStatementId) -> VisitorResult { - if self.illegal { - return Err(ParseError::new(h[stmt].position(), "Illegal channel delcaration")); - } - Ok(()) - } - fn visit_expression(&mut self, _h: &mut Heap, _expr: ExpressionId) -> VisitorResult { - Ok(()) - } -} - -struct FunctionStatementReturns {} - -impl FunctionStatementReturns { - fn new() -> Self { - FunctionStatementReturns {} - } - fn function_error(&self, position: InputPosition) -> VisitorResult { - Err(ParseError::new(position, "Function definition must return")) - } -} - -impl Visitor for FunctionStatementReturns { - fn visit_component_definition(&mut self, _h: &mut Heap, _def: ComponentId) -> VisitorResult { - Ok(()) - } - fn visit_variable_declaration(&mut self, _h: &mut Heap, _decl: VariableId) -> VisitorResult { - Ok(()) - } - fn visit_block_statement(&mut self, h: &mut Heap, block: BlockStatementId) -> VisitorResult { - let len = h[block].statements.len(); - assert!(len > 0); - self.visit_statement(h, h[block].statements[len - 1]) - } - fn visit_skip_statement(&mut self, h: &mut Heap, stmt: SkipStatementId) -> VisitorResult { - self.function_error(h[stmt].position) - } - fn visit_break_statement(&mut self, h: &mut Heap, stmt: BreakStatementId) -> VisitorResult { - self.function_error(h[stmt].position) - } - fn visit_continue_statement( - &mut self, - h: &mut Heap, - stmt: ContinueStatementId, - ) -> VisitorResult { - self.function_error(h[stmt].position) - } - fn visit_assert_statement(&mut self, h: &mut Heap, stmt: AssertStatementId) -> VisitorResult { - self.function_error(h[stmt].position) - } - fn visit_new_statement(&mut self, h: &mut Heap, stmt: NewStatementId) -> VisitorResult { - self.function_error(h[stmt].position) - } - fn visit_expression_statement( - &mut self, - h: &mut Heap, - stmt: ExpressionStatementId, - ) -> VisitorResult { - self.function_error(h[stmt].position) - } - fn visit_expression(&mut self, _h: &mut Heap, _expr: ExpressionId) -> VisitorResult { - Ok(()) - } -} - -struct ComponentStatementReturnNew { - illegal_new: bool, - illegal_return: bool, -} - -impl ComponentStatementReturnNew { - fn new() -> Self { - ComponentStatementReturnNew { illegal_new: false, illegal_return: false } - } -} - -impl Visitor for ComponentStatementReturnNew { - fn visit_component_definition(&mut self, h: &mut Heap, def: ComponentId) -> VisitorResult { - assert!(!(self.illegal_new || self.illegal_return)); - self.illegal_return = true; - recursive_component_definition(self, h, def)?; - self.illegal_return = false; - Ok(()) - } - fn visit_primitive_definition(&mut self, h: &mut Heap, def: PrimitiveId) -> VisitorResult { - assert!(!self.illegal_new); - self.illegal_new = true; - recursive_primitive_definition(self, h, def)?; - self.illegal_new = false; - Ok(()) - } - fn visit_function_definition(&mut self, h: &mut Heap, def: FunctionId) -> VisitorResult { - assert!(!(self.illegal_new || self.illegal_return)); - self.illegal_new = true; - recursive_function_definition(self, h, def)?; - self.illegal_new = false; - Ok(()) - } - fn visit_variable_declaration(&mut self, _h: &mut Heap, _decl: VariableId) -> VisitorResult { - Ok(()) - } - fn visit_return_statement(&mut self, h: &mut Heap, stmt: ReturnStatementId) -> VisitorResult { - if self.illegal_return { - Err(ParseError::new(h[stmt].position, "Component definition must not return")) - } else { - recursive_return_statement(self, h, stmt) - } - } - fn visit_new_statement(&mut self, h: &mut Heap, stmt: NewStatementId) -> VisitorResult { - if self.illegal_new { - Err(ParseError::new( - h[stmt].position, - "Symbol definition contains illegal new statement", - )) - } else { - recursive_new_statement(self, h, stmt) - } - } - fn visit_expression(&mut self, _h: &mut Heap, _expr: ExpressionId) -> VisitorResult { - Ok(()) - } -} - -struct CheckBuiltinOccurrences { - legal: bool, -} - -impl CheckBuiltinOccurrences { - fn new() -> Self { - CheckBuiltinOccurrences { legal: false } - } -} - -impl Visitor for CheckBuiltinOccurrences { - fn visit_synchronous_statement( - &mut self, - h: &mut Heap, - stmt: SynchronousStatementId, - ) -> VisitorResult { - assert!(!self.legal); - self.legal = true; - recursive_synchronous_statement(self, h, stmt)?; - self.legal = false; - Ok(()) - } - fn visit_call_expression(&mut self, h: &mut Heap, expr: CallExpressionId) -> VisitorResult { - match h[expr].method { - Method::Get | Method::Fires => { - if !self.legal { - return Err(ParseError::new(h[expr].position, "Illegal built-in occurrence")); - } - } - _ => {} - } - recursive_call_expression(self, h, expr) - } -} - -struct BuildSymbolDeclarations { - declarations: Vec, -} - -impl BuildSymbolDeclarations { - fn new() -> Self { - BuildSymbolDeclarations { declarations: Vec::new() } - } - fn checked_add(&mut self, h: &mut Heap, decl: DeclarationId) -> VisitorResult { - for &old in self.declarations.iter() { - let id = h[decl].identifier(); - if h[id] == h[h[old].identifier()] { - return match h[decl].clone() { - Declaration::Defined(defined) => Err(ParseError::new( - h[defined.definition].position(), - format!("Defined symbol clash: {}", h[id]), - )), - Declaration::Imported(imported) => Err(ParseError::new( - h[imported.import].position(), - format!("Imported symbol clash: {}", h[id]), - )), - }; - } - } - self.declarations.push(decl); - Ok(()) - } -} - -impl Visitor for BuildSymbolDeclarations { - fn visit_protocol_description(&mut self, h: &mut Heap, pd: RootId) -> VisitorResult { - recursive_protocol_description(self, h, pd)?; - // Move all collected declarations to the protocol description - h[pd].declarations.append(&mut self.declarations); - Ok(()) - } - fn visit_import(&mut self, h: &mut Heap, import: ImportId) -> VisitorResult { - // println!("DEBUG: Warning (at {}:{}), import actually not yet implemented", file!(), line!()); - let vec = library::get_declarations(h, import)?; - // Destructively iterate over the vector - for decl in vec { - self.checked_add(h, decl)?; - } - Ok(()) - } - fn visit_symbol_definition(&mut self, h: &mut Heap, definition: DefinitionId) -> VisitorResult { - let signature = Signature::from_definition(h, definition); - let decl = h - .alloc_defined_declaration(|this| DefinedDeclaration { this, definition, signature }) - .upcast(); - self.checked_add(h, decl)?; - Ok(()) - } -} - -struct LinkCallExpressions { - pd: Option, - composite: bool, - new_statement: bool, -} + for pragma in &root.pragmas { + match &self.heap[*pragma] { + Pragma::Module(module) => { + if !module_name.is_empty() { + return Err( + ParseError2::new_error(&source, module.position, "Double definition of module name in the same file") + .with_postfixed_info(&source, module_name_pos, "Previous definition was here") + ) + } -impl LinkCallExpressions { - fn new() -> Self { - LinkCallExpressions { pd: None, composite: false, new_statement: false } - } - fn get_declaration( - &self, - h: &Heap, - id: SourceIdentifierId, - ) -> Result { - match h[self.pd.unwrap()].get_declaration(h, id.upcast()) { - Some(id) => Ok(id), - None => Err(ParseError::new(h[id].position, "Unresolved method")), - } - } -} + module_name_pos = module.position.clone(); + module_name = module.value.clone(); + }, + Pragma::Version(version) => { + if module_version.is_some() { + return Err( + ParseError2::new_error(&source, version.position, "Double definition of module version") + .with_postfixed_info(&source, module_version_pos, "Previous definition was here") + ) + } -impl Visitor for LinkCallExpressions { - fn visit_protocol_description(&mut self, h: &mut Heap, pd: RootId) -> VisitorResult { - self.pd = Some(pd); - recursive_protocol_description(self, h, pd)?; - self.pd = None; - Ok(()) - } - fn visit_composite_definition(&mut self, h: &mut Heap, def: CompositeId) -> VisitorResult { - assert!(!self.composite); - self.composite = true; - recursive_composite_definition(self, h, def)?; - self.composite = false; - Ok(()) - } - fn visit_new_statement(&mut self, h: &mut Heap, stmt: NewStatementId) -> VisitorResult { - assert!(self.composite); - assert!(!self.new_statement); - self.new_statement = true; - recursive_new_statement(self, h, stmt)?; - self.new_statement = false; - Ok(()) - } - fn visit_call_expression(&mut self, h: &mut Heap, expr: CallExpressionId) -> VisitorResult { - if let Method::Symbolic(id) = h[expr].method { - let decl = self.get_declaration(h, id)?; - if self.new_statement && h[decl].is_function() { - return Err(ParseError::new(h[id].position, "Illegal call expression")); - } - if !self.new_statement && h[decl].is_component() { - return Err(ParseError::new(h[id].position, "Illegal call expression")); + module_version_pos = version.position.clone(); + module_version = Some(version.version); + }, } - // Set the corresponding declaration of the call - h[expr].declaration = Some(decl); } - // A new statement's call expression may have as arguments function calls - let old = self.new_statement; - self.new_statement = false; - recursive_call_expression(self, h, expr)?; - self.new_statement = old; - Ok(()) - } -} - -struct BuildScope { - scope: Option, -} -impl BuildScope { - fn new() -> Self { - BuildScope { scope: None } - } -} - -impl Visitor for BuildScope { - fn visit_symbol_definition(&mut self, h: &mut Heap, def: DefinitionId) -> VisitorResult { - assert!(self.scope.is_none()); - self.scope = Some(Scope::Definition(def)); - recursive_symbol_definition(self, h, def)?; - self.scope = None; - Ok(()) - } - fn visit_block_statement(&mut self, h: &mut Heap, stmt: BlockStatementId) -> VisitorResult { - assert!(!self.scope.is_none()); - let old = self.scope; - // First store the current scope - h[stmt].parent_scope = self.scope; - // Then move scope down to current block - self.scope = Some(Scope::Block(stmt)); - recursive_block_statement(self, h, stmt)?; - // Move scope back up - self.scope = old; - Ok(()) - } - fn visit_synchronous_statement( - &mut self, - h: &mut Heap, - stmt: SynchronousStatementId, - ) -> VisitorResult { - assert!(!self.scope.is_none()); - let old = self.scope; - // First store the current scope - h[stmt].parent_scope = self.scope; - // Then move scope down to current sync - self.scope = Some(Scope::Synchronous(stmt)); - recursive_synchronous_statement(self, h, stmt)?; - // Move scope back up - self.scope = old; - Ok(()) - } - fn visit_expression(&mut self, _h: &mut Heap, _expr: ExpressionId) -> VisitorResult { - Ok(()) - } -} - -struct ResolveVariables { - scope: Option, -} + // Add module to list of modules and prevent naming conflicts + let cur_module_idx = self.modules.len(); + if let Some(prev_module_idx) = self.module_lookup.get(&module_name) { + // Find `#module` statement in other module again + let prev_module = &self.modules[*prev_module_idx]; + let prev_module_pos = self.heap[prev_module.root_id].pragmas + .iter() + .find_map(|p| { + match &self.heap[*p] { + Pragma::Module(module) => Some(module.position.clone()), + _ => None + } + }) + .unwrap_or(InputPosition::default()); -impl ResolveVariables { - fn new() -> Self { - ResolveVariables { scope: None } - } - fn get_variable(&self, h: &Heap, id: SourceIdentifierId) -> Result { - if let Some(var) = self.find_variable(h, id) { - Ok(var) - } else { - Err(ParseError::new(h[id].position, "Unresolved variable")) - } - } - fn find_variable(&self, h: &Heap, id: SourceIdentifierId) -> Option { - ResolveVariables::find_variable_impl(h, self.scope, id) - } - fn find_variable_impl( - h: &Heap, - scope: Option, - id: SourceIdentifierId, - ) -> Option { - if let Some(scope) = scope { - // The order in which we check for variables is important: - // otherwise, two variables with the same name are shadowed. - if let Some(var) = ResolveVariables::find_variable_impl(h, scope.parent_scope(h), id) { - Some(var) + let module_name_msg = if module_name.is_empty() { + format!("a nameless module") } else { - scope.get_variable(h, id) - } - } else { - None - } - } -} - -impl Visitor for ResolveVariables { - fn visit_symbol_definition(&mut self, h: &mut Heap, def: DefinitionId) -> VisitorResult { - assert!(self.scope.is_none()); - self.scope = Some(Scope::Definition(def)); - recursive_symbol_definition(self, h, def)?; - self.scope = None; - Ok(()) + format!("module '{}'", String::from_utf8_lossy(&module_name)) + }; + + return Err( + ParseError2::new_error(&source, module_name_pos, &format!("Double definition of {} across files", module_name_msg)) + .with_postfixed_info(&prev_module.source, prev_module_pos, "Other definition was here") + ); + } + + self.modules.push(LexedModule{ + source, + module_name: module_name.clone(), + version: module_version, + root_id: pd + }); + self.module_lookup.insert(module_name, cur_module_idx); + Ok(pd) } - fn visit_variable_declaration(&mut self, h: &mut Heap, decl: VariableId) -> VisitorResult { - // This is only called for parameters of definitions and synchronous statements, - // since the local variables of block statements are still empty - // the moment it is traversed. After resolving variables, this - // function is also called for every local variable declaration. - // We want to make sure that the resolved variable is the variable declared itself; - // otherwise, there is some variable defined in the parent scope. This check - // imposes that the order in which find_variable looks is significant! - let id = h[decl].identifier(); - let check_same = self.find_variable(h, id); - if let Some(check_same) = check_same { - if check_same != decl { - return Err(ParseError::new(h[id].position, "Declared variable clash")); + pub fn compile(&mut self) { + // Build module lookup + } + + fn resolve_symbols_and_types(&mut self) -> Result<(), ParseError2> { + // Construct the symbol table to resolve any imports and/or definitions, + // then use the symbol table to actually annotate all of the imports. + // If the type table is constructed correctly then all imports MUST be + // resolvable. + // TODO: Update once namespaced identifiers are implemented + let symbol_table = SymbolTable::new(&self.heap, &self.modules)?; + + // Not pretty, but we need to work around rust's borrowing rules, it is + // totally safe to mutate the contents of an AST element that we are + // not borrowing anywhere else. + // TODO: Maybe directly access heap's members to allow borrowing from + // mutliple members of Heap? Not pretty though... + let mut module_index = 0; + let mut import_index = 0; + loop { + if module_index >= self.modules.len() { + break; } - } - recursive_variable_declaration(self, h, decl) - } - fn visit_memory_statement(&mut self, h: &mut Heap, stmt: MemoryStatementId) -> VisitorResult { - assert!(!self.scope.is_none()); - let var = h[stmt].variable; - let id = h[var].identifier; - // First check whether variable with same identifier is in scope - let check_duplicate = self.find_variable(h, id); - if check_duplicate.is_some() { - return Err(ParseError::new(h[id].position, "Declared variable clash")); - } - // Then check the expression's variables (this should not refer to own variable) - recursive_memory_statement(self, h, stmt)?; - // Finally, we may add the variable to the scope, which is guaranteed to be a block - { - let block = &mut h[self.scope.unwrap().to_block()]; - block.locals.push(var); - } - Ok(()) - } - fn visit_channel_statement(&mut self, h: &mut Heap, stmt: ChannelStatementId) -> VisitorResult { - assert!(!self.scope.is_none()); - // First handle the from variable - { - let var = h[stmt].from; - let id = h[var].identifier; - let check_duplicate = self.find_variable(h, id); - if check_duplicate.is_some() { - return Err(ParseError::new(h[id].position, "Declared variable clash")); - } - let block = &mut h[self.scope.unwrap().to_block()]; - block.locals.push(var); - } - // Then handle the to variable (which may not be the same as the from) - { - let var = h[stmt].to; - let id = h[var].identifier; - let check_duplicate = self.find_variable(h, id); - if check_duplicate.is_some() { - return Err(ParseError::new(h[id].position, "Declared variable clash")); - } - let block = &mut h[self.scope.unwrap().to_block()]; - block.locals.push(var); - } - Ok(()) - } - fn visit_block_statement(&mut self, h: &mut Heap, stmt: BlockStatementId) -> VisitorResult { - assert!(!self.scope.is_none()); - let old = self.scope; - self.scope = Some(Scope::Block(stmt)); - recursive_block_statement(self, h, stmt)?; - self.scope = old; - Ok(()) - } - fn visit_synchronous_statement( - &mut self, - h: &mut Heap, - stmt: SynchronousStatementId, - ) -> VisitorResult { - assert!(!self.scope.is_none()); - let old = self.scope; - self.scope = Some(Scope::Synchronous(stmt)); - recursive_synchronous_statement(self, h, stmt)?; - self.scope = old; - Ok(()) - } - fn visit_variable_expression( - &mut self, - h: &mut Heap, - expr: VariableExpressionId, - ) -> VisitorResult { - let var = self.get_variable(h, h[expr].identifier)?; - h[expr].declaration = Some(var); - Ok(()) - } -} - -struct UniqueStatementId(StatementId); - -struct LinkStatements { - prev: Option, -} - -impl LinkStatements { - fn new() -> Self { - LinkStatements { prev: None } - } -} - -impl Visitor for LinkStatements { - fn visit_symbol_definition(&mut self, h: &mut Heap, def: DefinitionId) -> VisitorResult { - assert!(self.prev.is_none()); - recursive_symbol_definition(self, h, def)?; - // Clear out last statement - self.prev = None; - Ok(()) - } - fn visit_statement(&mut self, h: &mut Heap, stmt: StatementId) -> VisitorResult { - if let Some(UniqueStatementId(prev)) = self.prev.take() { - h[prev].link_next(stmt); - } - recursive_statement(self, h, stmt) - } - fn visit_local_statement(&mut self, _h: &mut Heap, stmt: LocalStatementId) -> VisitorResult { - self.prev = Some(UniqueStatementId(stmt.upcast())); - Ok(()) - } - fn visit_labeled_statement(&mut self, h: &mut Heap, stmt: LabeledStatementId) -> VisitorResult { - recursive_labeled_statement(self, h, stmt) - } - fn visit_skip_statement(&mut self, _h: &mut Heap, stmt: SkipStatementId) -> VisitorResult { - self.prev = Some(UniqueStatementId(stmt.upcast())); - Ok(()) - } - fn visit_if_statement(&mut self, h: &mut Heap, stmt: IfStatementId) -> VisitorResult { - // We allocate a pseudo-statement, which combines both branches into one next statement - let position = h[stmt].position; - let pseudo = - h.alloc_end_if_statement(|this| EndIfStatement { this, position, next: None }).upcast(); - assert!(self.prev.is_none()); - self.visit_statement(h, h[stmt].true_body)?; - if let Some(UniqueStatementId(prev)) = self.prev.take() { - h[prev].link_next(pseudo); - } - assert!(self.prev.is_none()); - self.visit_statement(h, h[stmt].false_body)?; - if let Some(UniqueStatementId(prev)) = self.prev.take() { - h[prev].link_next(pseudo); - } - // Use the pseudo-statement as the statement where to update the next pointer - self.prev = Some(UniqueStatementId(pseudo)); - Ok(()) - } - fn visit_while_statement(&mut self, h: &mut Heap, stmt: WhileStatementId) -> VisitorResult { - // We allocate a pseudo-statement, to which the break statement finds its target - let position = h[stmt].position; - let pseudo = - h.alloc_end_while_statement(|this| EndWhileStatement { this, position, next: None }); - // Update the while's next statement to point to the pseudo-statement - h[stmt].next = Some(pseudo); - assert!(self.prev.is_none()); - self.visit_statement(h, h[stmt].body)?; - // The body's next statement loops back to the while statement itself - // Note: continue statements also loop back to the while statement itself - if let Some(UniqueStatementId(prev)) = std::mem::replace(&mut self.prev, None) { - h[prev].link_next(stmt.upcast()); - } - // Use the while statement as the statement where the next pointer is updated - self.prev = Some(UniqueStatementId(pseudo.upcast())); - Ok(()) - } - fn visit_break_statement(&mut self, _h: &mut Heap, _stmt: BreakStatementId) -> VisitorResult { - Ok(()) - } - fn visit_continue_statement( - &mut self, - _h: &mut Heap, - _stmt: ContinueStatementId, - ) -> VisitorResult { - Ok(()) - } - fn visit_synchronous_statement( - &mut self, - h: &mut Heap, - stmt: SynchronousStatementId, - ) -> VisitorResult { - // Allocate a pseudo-statement, that is added for helping the evaluator to issue a command - // that marks the end of the synchronous block. Every evaluation has to pause at this - // point, only to resume later when the thread is selected as unique thread to continue. - let position = h[stmt].position; - let pseudo = h - .alloc_end_synchronous_statement(|this| EndSynchronousStatement { - this, - position, - next: None, - }) - .upcast(); - assert!(self.prev.is_none()); - self.visit_statement(h, h[stmt].body)?; - // The body's next statement points to the pseudo element - if let Some(UniqueStatementId(prev)) = std::mem::replace(&mut self.prev, None) { - h[prev].link_next(pseudo); - } - // Use the pseudo-statement as the statement where the next pointer is updated - self.prev = Some(UniqueStatementId(pseudo)); - Ok(()) - } - fn visit_return_statement(&mut self, _h: &mut Heap, _stmt: ReturnStatementId) -> VisitorResult { - Ok(()) - } - fn visit_assert_statement(&mut self, _h: &mut Heap, stmt: AssertStatementId) -> VisitorResult { - self.prev = Some(UniqueStatementId(stmt.upcast())); - Ok(()) - } - fn visit_goto_statement(&mut self, _h: &mut Heap, _stmt: GotoStatementId) -> VisitorResult { - Ok(()) - } - fn visit_new_statement(&mut self, _h: &mut Heap, stmt: NewStatementId) -> VisitorResult { - self.prev = Some(UniqueStatementId(stmt.upcast())); - Ok(()) - } - fn visit_put_statement(&mut self, _h: &mut Heap, stmt: PutStatementId) -> VisitorResult { - self.prev = Some(UniqueStatementId(stmt.upcast())); - Ok(()) - } - fn visit_expression_statement( - &mut self, - _h: &mut Heap, - stmt: ExpressionStatementId, - ) -> VisitorResult { - self.prev = Some(UniqueStatementId(stmt.upcast())); - Ok(()) - } - fn visit_expression(&mut self, _h: &mut Heap, _expr: ExpressionId) -> VisitorResult { - Ok(()) - } -} - -struct BuildLabels { - block: Option, - sync_enclosure: Option, -} -impl BuildLabels { - fn new() -> Self { - BuildLabels { block: None, sync_enclosure: None } - } -} - -impl Visitor for BuildLabels { - fn visit_block_statement(&mut self, h: &mut Heap, stmt: BlockStatementId) -> VisitorResult { - assert_eq!(self.block, h[stmt].parent_block(h)); - let old = self.block; - self.block = Some(stmt); - recursive_block_statement(self, h, stmt)?; - self.block = old; - Ok(()) - } - fn visit_labeled_statement(&mut self, h: &mut Heap, stmt: LabeledStatementId) -> VisitorResult { - assert!(!self.block.is_none()); - // Store label in current block (on the fly) - h[self.block.unwrap()].labels.push(stmt); - // Update synchronous scope of label - h[stmt].in_sync = self.sync_enclosure; - recursive_labeled_statement(self, h, stmt) - } - fn visit_while_statement(&mut self, h: &mut Heap, stmt: WhileStatementId) -> VisitorResult { - h[stmt].in_sync = self.sync_enclosure; - recursive_while_statement(self, h, stmt) - } - fn visit_synchronous_statement( - &mut self, - h: &mut Heap, - stmt: SynchronousStatementId, - ) -> VisitorResult { - assert!(self.sync_enclosure.is_none()); - self.sync_enclosure = Some(stmt); - recursive_synchronous_statement(self, h, stmt)?; - self.sync_enclosure = None; - Ok(()) - } - fn visit_expression(&mut self, _h: &mut Heap, _expr: ExpressionId) -> VisitorResult { - Ok(()) - } -} - -struct ResolveLabels { - block: Option, - while_enclosure: Option, - sync_enclosure: Option, -} - -impl ResolveLabels { - fn new() -> Self { - ResolveLabels { block: None, while_enclosure: None, sync_enclosure: None } - } - fn check_duplicate_impl( - h: &Heap, - block: Option, - stmt: LabeledStatementId, - ) -> VisitorResult { - if let Some(block) = block { - // Checking the parent first is important. Otherwise, labels - // overshadow previously defined labels: and this is illegal! - ResolveLabels::check_duplicate_impl(h, h[block].parent_block(h), stmt)?; - // For the current block, check for a duplicate. - for &other_stmt in h[block].labels.iter() { - if other_stmt == stmt { - continue; - } else { - if h[h[other_stmt].label] == h[h[stmt].label] { - return Err(ParseError::new(h[stmt].position, "Duplicate label")); - } + let module_root_id = self.modules[module_index].root_id; + let import_id = { + let root = &self.heap[module_root_id]; + if import_index >= root.imports.len() { + module_index += 1; + import_index = 0; + continue } - } - } - Ok(()) - } - fn check_duplicate(&self, h: &Heap, stmt: LabeledStatementId) -> VisitorResult { - ResolveLabels::check_duplicate_impl(h, self.block, stmt) - } - fn get_target( - &self, - h: &Heap, - id: SourceIdentifierId, - ) -> Result { - if let Some(stmt) = ResolveLabels::find_target(h, self.block, id) { - Ok(stmt) - } else { - Err(ParseError::new(h[id].position, "Unresolved label")) - } - } - fn find_target( - h: &Heap, - block: Option, - id: SourceIdentifierId, - ) -> Option { - if let Some(block) = block { - // It does not matter in what order we find the labels. - // If there are duplicates: that is checked elsewhere. - for &stmt in h[block].labels.iter() { - if h[h[stmt].label] == h[id] { - return Some(stmt); + root.imports[import_index] + }; + + let import = &mut self.heap[import_id]; + match import { + Import::Module(import) => { + debug_assert!(import.module_id.is_none(), "module import already resolved"); + let target_module_id = symbol_table.resolve_module(&import.module_name) + .expect("module import is resolved by symbol table"); + import.module_id = Some(target_module_id) + }, + Import::Symbols(import) => { + debug_assert!(import.module_id.is_none(), "module of symbol import already resolved"); + let target_module_id = symbol_table.resolve_module(&import.module_name) + .expect("symbol import's module is resolved by symbol table"); + import.module_id = Some(target_module_id); + + for symbol in &mut import.symbols { + debug_assert!(symbol.definition_id.is_none(), "symbol import already resolved"); + let (_, target_definition_id) = symbol_table.resolve_symbol(module_root_id, &symbol.alias) + .expect("symbol import is resolved by symbol table") + .as_definition() + .expect("symbol import does not resolve to namespace symbol"); + symbol.definition_id = Some(target_definition_id); + } } } - if let Some(stmt) = ResolveLabels::find_target(h, h[block].parent_block(h), id) { - return Some(stmt); - } - } - None - } -} - -impl Visitor for ResolveLabels { - fn visit_block_statement(&mut self, h: &mut Heap, stmt: BlockStatementId) -> VisitorResult { - assert_eq!(self.block, h[stmt].parent_block(h)); - let old = self.block; - self.block = Some(stmt); - recursive_block_statement(self, h, stmt)?; - self.block = old; - Ok(()) - } - fn visit_labeled_statement(&mut self, h: &mut Heap, stmt: LabeledStatementId) -> VisitorResult { - assert!(!self.block.is_none()); - self.check_duplicate(h, stmt)?; - recursive_labeled_statement(self, h, stmt) - } - fn visit_while_statement(&mut self, h: &mut Heap, stmt: WhileStatementId) -> VisitorResult { - let old = self.while_enclosure; - self.while_enclosure = Some(stmt); - recursive_while_statement(self, h, stmt)?; - self.while_enclosure = old; - Ok(()) - } - fn visit_break_statement(&mut self, h: &mut Heap, stmt: BreakStatementId) -> VisitorResult { - let the_while; - if let Some(label) = h[stmt].label { - let target = self.get_target(h, label)?; - let target = &h[h[target].body]; - if !target.is_while() { - return Err(ParseError::new( - h[stmt].position, - "Illegal break: target not a while statement", - )); - } - the_while = target.as_while(); - // TODO: check if break is nested under while - } else { - if self.while_enclosure.is_none() { - return Err(ParseError::new( - h[stmt].position, - "Illegal break: no surrounding while statement", - )); - } - the_while = &h[self.while_enclosure.unwrap()]; - // break is always nested under while, by recursive vistor - } - if the_while.in_sync != self.sync_enclosure { - return Err(ParseError::new( - h[stmt].position, - "Illegal break: synchronous statement escape", - )); - } - h[stmt].target = the_while.next; - Ok(()) - } - fn visit_continue_statement( - &mut self, - h: &mut Heap, - stmt: ContinueStatementId, - ) -> VisitorResult { - let the_while; - if let Some(label) = h[stmt].label { - let target = self.get_target(h, label)?; - let target = &h[h[target].body]; - if !target.is_while() { - return Err(ParseError::new( - h[stmt].position, - "Illegal continue: target not a while statement", - )); - } - the_while = target.as_while(); - // TODO: check if continue is nested under while - } else { - if self.while_enclosure.is_none() { - return Err(ParseError::new( - h[stmt].position, - "Illegal continue: no surrounding while statement", - )); - } - the_while = &h[self.while_enclosure.unwrap()]; - // continue is always nested under while, by recursive vistor - } - if the_while.in_sync != self.sync_enclosure { - return Err(ParseError::new( - h[stmt].position, - "Illegal continue: synchronous statement escape", - )); - } - h[stmt].target = Some(the_while.this); - Ok(()) - } - fn visit_synchronous_statement( - &mut self, - h: &mut Heap, - stmt: SynchronousStatementId, - ) -> VisitorResult { - assert!(self.sync_enclosure.is_none()); - self.sync_enclosure = Some(stmt); - recursive_synchronous_statement(self, h, stmt)?; - self.sync_enclosure = None; - Ok(()) - } - fn visit_goto_statement(&mut self, h: &mut Heap, stmt: GotoStatementId) -> VisitorResult { - let target = self.get_target(h, h[stmt].label)?; - if h[target].in_sync != self.sync_enclosure { - return Err(ParseError::new( - h[stmt].position, - "Illegal goto: synchronous statement escape", - )); } - h[stmt].target = Some(target); - Ok(()) - } - fn visit_expression(&mut self, _h: &mut Heap, _expr: ExpressionId) -> VisitorResult { - Ok(()) - } -} -struct AssignableExpressions { - assignable: bool, -} + // All imports in the AST are now annotated. We now use the symbol table + // to construct the type table. -impl AssignableExpressions { - fn new() -> Self { - AssignableExpressions { assignable: false } - } - fn error(&self, position: InputPosition) -> VisitorResult { - Err(ParseError::new(position, "Unassignable expression")) - } -} -impl Visitor for AssignableExpressions { - fn visit_assignment_expression( - &mut self, - h: &mut Heap, - expr: AssignmentExpressionId, - ) -> VisitorResult { - if self.assignable { - self.error(h[expr].position) - } else { - self.assignable = true; - self.visit_expression(h, h[expr].left)?; - self.assignable = false; - self.visit_expression(h, h[expr].right) - } - } - fn visit_conditional_expression( - &mut self, - h: &mut Heap, - expr: ConditionalExpressionId, - ) -> VisitorResult { - if self.assignable { - self.error(h[expr].position) - } else { - recursive_conditional_expression(self, h, expr) - } - } - fn visit_binary_expression(&mut self, h: &mut Heap, expr: BinaryExpressionId) -> VisitorResult { - if self.assignable { - self.error(h[expr].position) - } else { - recursive_binary_expression(self, h, expr) - } - } - fn visit_unary_expression(&mut self, h: &mut Heap, expr: UnaryExpressionId) -> VisitorResult { - if self.assignable { - self.error(h[expr].position) - } else { - match h[expr].operation { - UnaryOperation::PostDecrement - | UnaryOperation::PreDecrement - | UnaryOperation::PostIncrement - | UnaryOperation::PreIncrement => { - self.assignable = true; - recursive_unary_expression(self, h, expr)?; - self.assignable = false; - Ok(()) - } - _ => recursive_unary_expression(self, h, expr), - } - } - } - fn visit_indexing_expression( - &mut self, - h: &mut Heap, - expr: IndexingExpressionId, - ) -> VisitorResult { - let old = self.assignable; - self.assignable = false; - recursive_indexing_expression(self, h, expr)?; - self.assignable = old; - Ok(()) - } - fn visit_slicing_expression( - &mut self, - h: &mut Heap, - expr: SlicingExpressionId, - ) -> VisitorResult { - let old = self.assignable; - self.assignable = false; - recursive_slicing_expression(self, h, expr)?; - self.assignable = old; - Ok(()) - } - fn visit_select_expression(&mut self, h: &mut Heap, expr: SelectExpressionId) -> VisitorResult { - if h[expr].field.is_length() && self.assignable { - return self.error(h[expr].position); - } - let old = self.assignable; - self.assignable = false; - recursive_select_expression(self, h, expr)?; - self.assignable = old; Ok(()) } - fn visit_array_expression(&mut self, h: &mut Heap, expr: ArrayExpressionId) -> VisitorResult { - if self.assignable { - self.error(h[expr].position) - } else { - recursive_array_expression(self, h, expr) - } - } - fn visit_call_expression(&mut self, h: &mut Heap, expr: CallExpressionId) -> VisitorResult { - if self.assignable { - self.error(h[expr].position) - } else { - recursive_call_expression(self, h, expr) - } - } - fn visit_constant_expression( - &mut self, - h: &mut Heap, - expr: ConstantExpressionId, - ) -> VisitorResult { - if self.assignable { - self.error(h[expr].position) - } else { - Ok(()) - } - } - fn visit_variable_expression( - &mut self, - _h: &mut Heap, - _expr: VariableExpressionId, - ) -> VisitorResult { - Ok(()) - } -} - -struct IndexableExpressions { - indexable: bool, -} -impl IndexableExpressions { - fn new() -> Self { - IndexableExpressions { indexable: false } - } - fn error(&self, position: InputPosition) -> VisitorResult { - Err(ParseError::new(position, "Unindexable expression")) - } -} + // TODO: @fix, temporary impl to keep code compilable + pub fn parse(&mut self) -> Result { + assert_eq!(self.modules.len(), 1, "Fix meeeee"); + let root_id = self.modules[0].root_id; + println!("DEBUG: With root id {:?}\nSource: {}", root_id, String::from_utf8_lossy(&self.modules[0].source.input)); -impl Visitor for IndexableExpressions { - fn visit_assignment_expression( - &mut self, - h: &mut Heap, - expr: AssignmentExpressionId, - ) -> VisitorResult { - if self.indexable { - self.error(h[expr].position) - } else { - recursive_assignment_expression(self, h, expr) - } - } - fn visit_conditional_expression( - &mut self, - h: &mut Heap, - expr: ConditionalExpressionId, - ) -> VisitorResult { - let old = self.indexable; - self.indexable = false; - self.visit_expression(h, h[expr].test)?; - self.indexable = old; - self.visit_expression(h, h[expr].true_expression)?; - self.visit_expression(h, h[expr].false_expression) - } - fn visit_binary_expression(&mut self, h: &mut Heap, expr: BinaryExpressionId) -> VisitorResult { - if self.indexable && h[expr].operation != BinaryOperator::Concatenate { - self.error(h[expr].position) - } else { - recursive_binary_expression(self, h, expr) - } - } - fn visit_unary_expression(&mut self, h: &mut Heap, expr: UnaryExpressionId) -> VisitorResult { - if self.indexable { - self.error(h[expr].position) - } else { - recursive_unary_expression(self, h, expr) + if let Err((position, message)) = Self::parse_inner(&mut self.heap, root_id) { + return Err(ParseError2::new_error(&self.modules[0].source, position, &message)) } + Ok(root_id) } - fn visit_indexing_expression( - &mut self, - h: &mut Heap, - expr: IndexingExpressionId, - ) -> VisitorResult { - let old = self.indexable; - self.indexable = true; - self.visit_expression(h, h[expr].subject)?; - self.indexable = false; - self.visit_expression(h, h[expr].index)?; - self.indexable = old; - Ok(()) - } - fn visit_slicing_expression( - &mut self, - h: &mut Heap, - expr: SlicingExpressionId, - ) -> VisitorResult { - let old = self.indexable; - self.indexable = true; - self.visit_expression(h, h[expr].subject)?; - self.indexable = false; - self.visit_expression(h, h[expr].from_index)?; - self.visit_expression(h, h[expr].to_index)?; - self.indexable = old; - Ok(()) - } - fn visit_select_expression(&mut self, h: &mut Heap, expr: SelectExpressionId) -> VisitorResult { - let old = self.indexable; - self.indexable = false; - recursive_select_expression(self, h, expr)?; - self.indexable = old; - Ok(()) - } - fn visit_array_expression(&mut self, h: &mut Heap, expr: ArrayExpressionId) -> VisitorResult { - let old = self.indexable; - self.indexable = false; - recursive_array_expression(self, h, expr)?; - self.indexable = old; - Ok(()) - } - fn visit_call_expression(&mut self, h: &mut Heap, expr: CallExpressionId) -> VisitorResult { - let old = self.indexable; - self.indexable = false; - recursive_call_expression(self, h, expr)?; - self.indexable = old; - Ok(()) - } - fn visit_constant_expression( - &mut self, - h: &mut Heap, - expr: ConstantExpressionId, - ) -> VisitorResult { - if self.indexable { - self.error(h[expr].position) - } else { - Ok(()) - } - } -} - -struct SelectableExpressions { - selectable: bool, -} - -impl SelectableExpressions { - fn new() -> Self { - SelectableExpressions { selectable: false } - } - fn error(&self, position: InputPosition) -> VisitorResult { - Err(ParseError::new(position, "Unselectable expression")) - } -} -impl Visitor for SelectableExpressions { - fn visit_assignment_expression( - &mut self, - h: &mut Heap, - expr: AssignmentExpressionId, - ) -> VisitorResult { - // left-hand side of assignment can be skipped - let old = self.selectable; - self.selectable = false; - self.visit_expression(h, h[expr].right)?; - self.selectable = old; - Ok(()) - } - fn visit_conditional_expression( - &mut self, - h: &mut Heap, - expr: ConditionalExpressionId, - ) -> VisitorResult { - let old = self.selectable; - self.selectable = false; - self.visit_expression(h, h[expr].test)?; - self.selectable = old; - self.visit_expression(h, h[expr].true_expression)?; - self.visit_expression(h, h[expr].false_expression) - } - fn visit_binary_expression(&mut self, h: &mut Heap, expr: BinaryExpressionId) -> VisitorResult { - if self.selectable && h[expr].operation != BinaryOperator::Concatenate { - self.error(h[expr].position) - } else { - recursive_binary_expression(self, h, expr) - } - } - fn visit_unary_expression(&mut self, h: &mut Heap, expr: UnaryExpressionId) -> VisitorResult { - if self.selectable { - self.error(h[expr].position) - } else { - recursive_unary_expression(self, h, expr) - } - } - fn visit_indexing_expression( - &mut self, - h: &mut Heap, - expr: IndexingExpressionId, - ) -> VisitorResult { - let old = self.selectable; - self.selectable = false; - recursive_indexing_expression(self, h, expr)?; - self.selectable = old; - Ok(()) - } - fn visit_slicing_expression( - &mut self, - h: &mut Heap, - expr: SlicingExpressionId, - ) -> VisitorResult { - let old = self.selectable; - self.selectable = false; - recursive_slicing_expression(self, h, expr)?; - self.selectable = old; - Ok(()) - } - fn visit_select_expression(&mut self, h: &mut Heap, expr: SelectExpressionId) -> VisitorResult { - let old = self.selectable; - self.selectable = false; - recursive_select_expression(self, h, expr)?; - self.selectable = old; - Ok(()) - } - fn visit_array_expression(&mut self, h: &mut Heap, expr: ArrayExpressionId) -> VisitorResult { - let old = self.selectable; - self.selectable = false; - recursive_array_expression(self, h, expr)?; - self.selectable = old; - Ok(()) - } - fn visit_call_expression(&mut self, h: &mut Heap, expr: CallExpressionId) -> VisitorResult { - let old = self.selectable; - self.selectable = false; - recursive_call_expression(self, h, expr)?; - self.selectable = old; - Ok(()) - } - fn visit_constant_expression( - &mut self, - h: &mut Heap, - expr: ConstantExpressionId, - ) -> VisitorResult { - if self.selectable { - self.error(h[expr].position) - } else { - Ok(()) - } - } -} - -pub struct Parser<'a> { - source: &'a mut InputSource, -} - -impl<'a> Parser<'a> { - pub fn new(source: &'a mut InputSource) -> Self { - Parser { source } - } - pub fn parse(&mut self, h: &mut Heap) -> Result { - let mut lex = Lexer::new(self.source); - let pd = lex.consume_protocol_description(h)?; + pub fn parse_inner(h: &mut Heap, pd: RootId) -> VisitorResult { NestedSynchronousStatements::new().visit_protocol_description(h, pd)?; ChannelStatementOccurrences::new().visit_protocol_description(h, pd)?; FunctionStatementReturns::new().visit_protocol_description(h, pd)?; @@ -1817,7 +213,8 @@ impl<'a> Parser<'a> { AssignableExpressions::new().visit_protocol_description(h, pd)?; IndexableExpressions::new().visit_protocol_description(h, pd)?; SelectableExpressions::new().visit_protocol_description(h, pd)?; - Ok(pd) + + Ok(()) } } @@ -1835,16 +232,14 @@ mod tests { let resource = resource.expect("read testdata filepath"); // println!(" * running: {}", &resource); let path = Path::new(&resource); - let mut heap = Heap::new(); let mut source = InputSource::from_file(&path).unwrap(); // println!("DEBUG -- input:\n{}", String::from_utf8_lossy(&source.input)); - let mut parser = Parser::new(&mut source); - match parser.parse(&mut heap) { + let mut parser = Parser::new_with_source(source).expect("parse source"); + match parser.parse() { Ok(_) => {} Err(err) => { println!(" > file: {}", &resource); - println!("{}", err.display(&source)); - println!("{:?}", err); + println!("{}", err); assert!(false); } } @@ -1857,12 +252,10 @@ mod tests { let resource = resource.expect("read testdata filepath"); let path = Path::new(&resource); let expect = path.with_extension("txt"); - let mut heap = Heap::new(); let mut source = InputSource::from_file(&path).unwrap(); - let mut parser = Parser::new(&mut source); - match parser.parse(&mut heap) { + let mut parser = Parser::new_with_source(source).expect("construct parser"); + match parser.parse() { Ok(pd) => { - println!("{:?}", heap[pd]); println!("Expected parse error:"); let mut cev: Vec = Vec::new(); @@ -1872,18 +265,15 @@ mod tests { assert!(false); } Err(err) => { - println!("{:?}", err); - - let mut vec: Vec = Vec::new(); - err.write(&source, &mut vec).unwrap(); - println!("{}", String::from_utf8_lossy(&vec)); + let expected = format!("{}", err); + println!("{}", &expected); let mut cev: Vec = Vec::new(); let mut f = File::open(expect).unwrap(); f.read_to_end(&mut cev).unwrap(); println!("{}", String::from_utf8_lossy(&cev)); - assert_eq!(vec, cev); + assert_eq!(expected.as_bytes(), cev); } } } @@ -1894,9 +284,8 @@ mod tests { for resource in TestFileIter::new("testdata/parser/counterexamples", "pdl") { let resource = resource.expect("read testdata filepath"); let path = Path::new(&resource); - let mut heap = Heap::new(); - let mut source = InputSource::from_file(&path).unwrap(); - let mut parser = Parser::new(&mut source); + let source = InputSource::from_file(&path).unwrap(); + let mut parser = Parser::new_with_source(source).expect("construct parser"); fn print_header(s: &str) { println!("{}", "=".repeat(80)); @@ -1904,20 +293,17 @@ mod tests { println!("{}", "=".repeat(80)); } - match parser.parse(&mut heap) { + match parser.parse() { Ok(parsed) => { print_header(&resource); - println!("\n SUCCESS\n\n --- source:\n{}", String::from_utf8_lossy(&source.input)); + println!("\n SUCCESS\n\n --- source:\n{}", String::from_utf8_lossy(&parser.modules[0].source.input)); }, Err(err) => { print_header(&resource); - - let mut err_buf = Vec::new(); - err.write(&source, &mut err_buf); println!( "\n FAILURE\n\n --- error:\n{}\n --- source:\n{}", - String::from_utf8_lossy(&err_buf), - String::from_utf8_lossy(&source.input) + err, + String::from_utf8_lossy(&parser.modules[0].source.input) ) } } diff --git a/src/protocol/parser/shallow_visitor.rs b/src/protocol/parser/shallow_visitor.rs index 87ffebf5cc573b7a757ee6a7ad99f70cf736f9a3..f7082890de7f68e8716ffd0f72bb971d020a85d3 100644 --- a/src/protocol/parser/shallow_visitor.rs +++ b/src/protocol/parser/shallow_visitor.rs @@ -6,5 +6,7 @@ type Unit = (); type VisitorResult = Result; trait ShallowVisitor: Sized { - fn visit_protocol_description(&mut self, h: &mut Heap, pd: RootId) -> + fn visit_protocol_description(&mut self, h: &mut Heap, pd: RootId) -> VisitorResult { + + } } \ No newline at end of file diff --git a/src/protocol/parser/symbol_table.rs b/src/protocol/parser/symbol_table.rs new file mode 100644 index 0000000000000000000000000000000000000000..9902584d39451161353cf41d5ace4cf7828e6f0e --- /dev/null +++ b/src/protocol/parser/symbol_table.rs @@ -0,0 +1,338 @@ +use crate::protocol::ast::*; +use crate::protocol::inputsource::*; + +use std::collections::{HashMap, hash_map::Entry}; +use crate::protocol::parser::LexedModule; + +#[derive(PartialEq, Eq, Hash)] +struct SymbolKey { + module_id: RootId, + symbol_name: Vec, +} + +pub(crate) enum Symbol { + Namespace(RootId), + Definition((RootId, DefinitionId)), +} + +pub(crate) struct SymbolValue { + // Position refers to the origin of the symbol definition (i.e. the module's + // RootId that is in the key being used to lookup this value) + position: InputPosition, + symbol: Symbol, +} + +impl SymbolValue { + pub(crate) fn as_namespace(&self) -> Option { + match &self.symbol { + Symbol::Namespace(root_id) => Some(*root_id), + _ => None, + } + } + + pub(crate) fn as_definition(&self) -> Option<(RootId, DefinitionId)> { + match &self.symbol { + Symbol::Definition((root_id, definition_id)) => Some((*root_id, *definition_id)), + _ => None, + } + } +} +/// `SymbolTable` is responsible for two parts of the parsing process: firstly +/// it ensures that there are no clashing symbol definitions within each file, +/// and secondly it will resolve all symbols within a module to their +/// appropriate definitions (in case of enums, functions, etc.) and namespaces +/// (currently only external modules can act as namespaces). If a symbol clashes +/// or if a symbol cannot be resolved this will be an error. +/// +/// Within the compilation process the symbol table is responsible for resolving +/// namespaced identifiers (e.g. Module::Enum::EnumVariant) to the appropriate +/// definition (i.e. not namespaces; as the language has no way to use +/// namespaces except for using them in namespaced identifiers). +// TODO: Maybe allow namespaced-aliased imports. It is currently not possible +// to express the following: +// import Module.Submodule as ModSub +// import SubMod::{Symbol} +pub(crate) struct SymbolTable { + // Lookup from module name (not any aliases) to the root id + module_lookup: HashMap, RootId>, + // Lookup from within a module, to a particular imported (potentially + // aliased) or defined symbol. Basically speaking: if the source code of a + // module contains correctly imported/defined symbols, then this lookup + // will always return the corresponding definition + symbol_lookup: HashMap, +} + +impl SymbolTable { + pub(crate) fn new(heap: &Heap, modules: &[LexedModule]) -> Result { + // Sanity check + if cfg!(debug_assertions) { + for (index, module) in modules.iter().enumerate() { + debug_assert_eq!( + index, module.root_id.0.index as usize, + "module RootId does not correspond to LexedModule index" + ) + } + } + + // Preparation: create a lookup from module name to root id. This does + // not take aliasing into account. + let mut module_lookup = HashMap::with_capacity(modules.len()); + for module in modules { + // TODO: Maybe put duplicate module name checking here? + // TODO: @string + module_lookup.insert(module.module_name.clone(), module.root_id); + } + + // Preparation: determine total number of imports we will be inserting + // into the lookup table. We could just iterate over the arena, but then + // we don't know the source file the import belongs to. + let mut lookup_reserve_size = 0; + for module in modules { + let module_root = &heap[module.root_id]; + for import_id in &module_root.imports { + match &heap[*import_id] { + Import::Module(_) => lookup_reserve_size += 1, + Import::Symbols(import) => { + if import.symbols.is_empty() { + // Add all symbols from the other module + match module_lookup.get(&import.module_name) { + Some(target_module_id) => { + lookup_reserve_size += heap[*target_module_id].definitions.len() + }, + None => { + return Err( + ParseError2::new_error(&module.source, import.position, "Cannot resolve module") + ); + } + } + } + } + } + } + } + + let mut table = Self{ + module_lookup, + symbol_lookup: HashMap::with_capacity(lookup_reserve_size) + }; + + // First pass: we go through all of the modules and add lookups to + // symbols that are defined within that module. Cross-module imports are + // not yet resolved + for module in modules { + let root = &heap[module.root_id]; + for definition_id in &root.definitions { + let definition = &heap[*definition_id]; + let identifier = definition.identifier(); + if let Err(previous_position) = table.add_definition_symbol( + module.root_id, identifier.position, &identifier.value, + module.root_id, *definition_id + ) { + return Err( + ParseError2::new_error(&module.source, definition.position(), "Symbol is multiply defined") + .with_postfixed_info(&module.source, previous_position, "Previous definition was here") + ) + } + } + } + + // Second pass: now that we can find symbols in modules, we can resolve + // all imports (if they're correct, that is) + for module in modules { + let root = &heap[module.root_id]; + for import_id in &root.imports { + let import = &heap[*import_id]; + match import { + Import::Module(import) => { + // Find the module using its name + let target_root_id = table.resolve_module(&import.module_name); + if target_root_id.is_none() { + return Err(ParseError2::new_error(&module.source, import.position, "Could not resolve module")); + } + let target_root_id = target_root_id.unwrap(); + if target_root_id == module.root_id { + return Err(ParseError2::new_error(&module.source, import.position, "Illegal import of self")); + } + + // Add the target module under its alias + if let Err(previous_position) = table.add_namespace_symbol( + module.root_id, import.position, + &import.alias, target_root_id + ) { + return Err( + ParseError2::new_error(&module.source, import.position, "Symbol is multiply defined") + .with_postfixed_info(&module.source, previous_position, "Previous definition was here") + ); + } + }, + Import::Symbols(import) => { + // Find the target module using its name + let target_root_id = table.resolve_module(&import.module_name); + if target_root_id.is_none() { + return Err(ParseError2::new_error(&module.source, import.position, "Could not resolve module of symbol imports")); + } + let target_root_id = target_root_id.unwrap(); + if target_root_id == module.root_id { + return Err(ParseError2::new_error(&module.source, import.position, "Illegal import of self")); + } + + // Determine which symbols to import + if import.symbols.is_empty() { + // Import of all symbols, not using any aliases + for definition_id in &heap[target_root_id].definitions { + let definition = &heap[*definition_id]; + let identifier = definition.identifier(); + if let Err(previous_position) = table.add_definition_symbol( + module.root_id, import.position, &identifier.value, + target_root_id, *definition_id + ) { + return Err( + ParseError2::new_error( + &module.source, import.position, + &format!("Imported symbol '{}' is already defined", String::from_utf8_lossy(&identifier.value)) + ) + .with_postfixed_info( + &modules[target_root_id.0.index as usize].source, + definition.position(), + "The imported symbol is defined here" + ) + .with_postfixed_info( + &module.source, previous_position, "And is previously defined here" + ) + ) + } + } + } else { + // Import of specific symbols, optionally using aliases + for symbol in &import.symbols { + // Because we have already added per-module definitions, we can use + // the table to lookup this particular symbol. Note: within a single + // module a namespace-import and a symbol-import may not collide. + // Hence per-module symbols are unique. + // However: if we import a symbol from another module, we don't want + // to "import a module's imported symbol". And so if we do find + // a symbol match, we need to make sure it is a definition from + // within that module by checking `source_root_id == target_root_id` + let target_symbol = table.resolve_symbol(target_root_id, &symbol.name); + let symbol_definition_id = match target_symbol { + Some(target_symbol) => { + match target_symbol.symbol { + Symbol::Definition((symbol_root_id, symbol_definition_id)) => { + if symbol_root_id == target_root_id { + Some(symbol_definition_id) + } else { + // This is imported within the target module, and not + // defined within the target module + None + } + }, + Symbol::Namespace(_) => { + // We don't import a module's "module import" + None + } + } + }, + None => None + }; + + if symbol_definition_id.is_none() { + return Err( + ParseError2::new_error(&module.source, symbol.position, "Could not resolve symbol") + ) + } + let symbol_definition_id = symbol_definition_id.unwrap(); + + if let Err(previous_position) = table.add_definition_symbol( + module.root_id, symbol.position, &symbol.alias, + target_root_id, symbol_definition_id + ) { + return Err( + ParseError2::new_error(&module.source, symbol.position, "Symbol is multiply defined") + .with_postfixed_info(&module.source, previous_position, "Previous definition was here") + ) + } + } + } + } + } + } + } + + debug_assert_eq!( + table.symbol_lookup.len(), lookup_reserve_size, + "miscalculated reserved size for symbol lookup table" + ); + Ok(table) + } + + /// Resolves a module by its defined name + pub(crate) fn resolve_module(&self, identifier: &Vec) -> Option { + self.module_lookup.get(identifier).map(|v| *v) + } + + /// Resolves a symbol within a particular module, indicated by its RootId, + /// with a single non-namespaced identifier + pub(crate) fn resolve_symbol(&self, within_module_id: RootId, identifier: &Vec) -> Option<&SymbolValue> { + self.symbol_lookup.get(&SymbolKey{ module_id: within_module_id, symbol_name: identifier.clone() }) + } + + /// Resolves a namespaced symbol. It will try to go as far as possible in + /// actually finding a definition or a namespace. So a namespace might be + /// resolved, after it which it finds an actual definition. It may be that + /// the namespaced identifier has more elements that should be checked + /// (i.e. an enum variant, or simply an erroneous instance of too many + /// chained identifiers). This function will return None if nothing could be + /// resolved at all. + pub(crate) fn resolve_namespaced_symbol(&self, _within_module_id: RootId) { + todo!("implement") + } + + /// Attempts to add a namespace symbol. Returns `Ok` if the symbol was + /// inserted. If the symbol already exists then `Err` will be returned + /// together with the previous definition's source position (in the origin + /// module's source file). + // Note: I would love to return a reference to the value, but Rust is + // preventing me from doing so... + fn add_namespace_symbol( + &mut self, origin_module_id: RootId, origin_position: InputPosition, symbol_name: &Vec, target_module_id: RootId + ) -> Result<(), InputPosition> { + let key = SymbolKey{ + module_id: origin_module_id, + symbol_name: symbol_name.clone() + }; + match self.symbol_lookup.entry(key) { + Entry::Occupied(o) => Err(o.get().position), + Entry::Vacant(v) => { + v.insert(SymbolValue{ + position: origin_position, + symbol: Symbol::Namespace(target_module_id) + }); + Ok(()) + } + } + } + + /// Attempts to add a definition symbol. Returns `Ok` if the symbol was + /// inserted. If the symbol already exists then `Err` will be returned + /// together with the previous definition's source position (in the origin + /// module's source file). + fn add_definition_symbol( + &mut self, origin_module_id: RootId, origin_position: InputPosition, symbol_name: &Vec, + target_module_id: RootId, target_definition_id: DefinitionId, + ) -> Result<(), InputPosition> { + let key = SymbolKey{ + module_id: origin_module_id, + symbol_name: symbol_name.clone() + }; + match self.symbol_lookup.entry(key) { + Entry::Occupied(o) => Err(o.get().position), + Entry::Vacant(v) => { + v.insert(SymbolValue { + position: origin_position, + symbol: Symbol::Definition((target_module_id, target_definition_id)) + }); + Ok(()) + } + } + } +} \ No newline at end of file diff --git a/src/protocol/parser/type_table.rs b/src/protocol/parser/type_table.rs new file mode 100644 index 0000000000000000000000000000000000000000..ef4fddc7df0df7a303ced1f52a78c9f3c3a98053 --- /dev/null +++ b/src/protocol/parser/type_table.rs @@ -0,0 +1,64 @@ +// TODO: @fix PrimitiveType for enums/unions +use crate::protocol::ast::*; +use crate::protocol::parser::symbol_table::*; + +use std::collections::HashMap; + + +enum DefinedType { + Enum(EnumType), + Union(UnionType), + Struct(StructType), + Function(FunctionType), + Component(ComponentType) +} + +// TODO: Also support maximum u64 value +struct EnumVariant { + identifier: Identifier, + value: i64, +} + +/// `EnumType` is the classical C/C++ enum type. It has various variants with +/// an assigned integer value. The integer values may be user-defined, +/// compiler-defined, or a mix of the two. If a user assigns the same enum +/// value multiple times, we assume the user is an expert and we consider both +/// variants to be equal to one another. +struct EnumType { + definition: DefinitionId, + variants: Vec, + representation: PrimitiveType, +} + +struct UnionVariant { + identifier: Identifier, + embedded_type: Option, + tag_value: i64, +} + +struct UnionType { + definition: DefinitionId, + variants: Vec, + tag_representation: PrimitiveType +} + +struct StructMemberType { + identifier: Identifier, + +} + +struct StructType { + +} + +struct FunctionType { + +} + +struct ComponentType { + +} + +struct TypeTable { + lookup: HashMap, +} \ No newline at end of file