diff --git a/src/collections/scoped_buffer.rs b/src/collections/scoped_buffer.rs index 5b8c08eafe6bb2756822b73b55c456787ad94d10..051d3618ffa00d6874b4c1cf353a5f6140ed53ea 100644 --- a/src/collections/scoped_buffer.rs +++ b/src/collections/scoped_buffer.rs @@ -101,7 +101,7 @@ impl std::ops::Index for ScopedSection { } #[cfg(debug_assertions)] -impl Drop for ScopedBuffer { +impl Drop for ScopedSection { fn drop(&mut self) { // Make sure that the data was actually taken out of the scoped section let vec = unsafe{&*self.inner}; diff --git a/src/collections/string_pool.rs b/src/collections/string_pool.rs index 91de3c466cf5cfedb0efcb9c6194897c51bdad6a..c700c15b894d7824fa73bd03baf135b4d884ac6e 100644 --- a/src/collections/string_pool.rs +++ b/src/collections/string_pool.rs @@ -1,9 +1,8 @@ use std::ptr::null_mut; -use std::collections::hash_map::DefaultHasher; use std::hash::{Hash, Hasher}; use std::marker::PhantomData; -const SLAB_SIZE: usize = u16::max_value() as usize; +const SLAB_SIZE: usize = u16::MAX as usize; #[derive(Clone)] pub struct StringRef<'a> { @@ -39,18 +38,18 @@ impl<'a> StringRef<'a> { } } -impl PartialEq for StringRef { +impl PartialEq for StringRef<'_> { fn eq(&self, other: &StringRef) -> bool { self.as_str() == other.as_str() } } -impl Eq for StringRef {} +impl Eq for StringRef<'_> {} -impl Hash for StringRef { +impl Hash for StringRef<'_> { fn hash(&self, state: &mut H) { unsafe{ - state.write(std::slice::from_raw_parts(self.data, self.length)); + state.write(self.as_bytes()); } } } diff --git a/src/ffi/pseudo_socket_api.rs b/src/ffi/pseudo_socket_api.rs index cde18dae8d383a7c9b482f409945fcb244f7c694..6ec03a89ff0c7cfa1f9e082bf1f37f34e4d8d2b2 100644 --- a/src/ffi/pseudo_socket_api.rs +++ b/src/ffi/pseudo_socket_api.rs @@ -61,6 +61,9 @@ impl FdAllocator { } lazy_static::lazy_static! { static ref CC_MAP: RwLock = Default::default(); + static ref TRIVIAL_PD: Arc = { + Arc::new(ProtocolDescription::parse(b"").unwrap()) + }; } impl ConnectorComplex { fn try_become_connected(&mut self) { @@ -81,11 +84,12 @@ impl ConnectorComplex { pub extern "C" fn rw_socket(_domain: c_int, _type: c_int, _protocol: c_int) -> c_int { // get writer lock let mut w = if let Ok(w) = CC_MAP.write() { w } else { return RW_LOCK_POISONED }; + let fd = w.fd_allocator.alloc(); let cc = ConnectorComplex { connector: Connector::new( Box::new(crate::DummyLogger), - crate::TRIVIAL_PD.clone(), + TRIVIAL_PD.clone(), Connector::random_id(), ), phased: ConnectorComplexPhased::Setup { local: None, peer: None }, diff --git a/src/lib.rs b/src/lib.rs index becc35e33199c98bd851df96cde81cbbbf21b786..68d205d6b3fb5693de1ceef562f4dcafa2b18af2 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -7,13 +7,12 @@ mod runtime; mod collections; pub use common::{ConnectorId, EndpointPolarity, Payload, Polarity, PortId}; -pub use protocol::{ProtocolDescription, TRIVIAL_PD}; +pub use protocol::ProtocolDescription; pub use runtime::{error, Connector, DummyLogger, FileLogger, VecLogger}; // TODO: Remove when not benchmarking -pub use protocol::inputsource::InputSource; +pub use protocol::input_source::InputSource; pub use protocol::ast::Heap; -pub use protocol::lexer::Lexer; #[cfg(feature = "ffi")] pub mod ffi; diff --git a/src/protocol/ast.rs b/src/protocol/ast.rs index 65c6949bccdb2b7497f69c191a181ce9d98c5c73..e1bfda4d72a28b6fb7a7ab8be073cd7a7efc199a 100644 --- a/src/protocol/ast.rs +++ b/src/protocol/ast.rs @@ -7,8 +7,7 @@ use std::ops::{Index, IndexMut}; use super::arena::{Arena, Id}; use crate::collections::StringRef; -use crate::protocol::inputsource::*; -use crate::protocol::input_source2::{InputPosition2, InputSpan}; +use crate::protocol::input_source::InputSpan; /// Helper macro that defines a type alias for a AST element ID. In this case /// only used to alias the `Id` types. @@ -60,7 +59,7 @@ macro_rules! define_new_ast_id { pub struct $name (pub(crate) $parent); impl $name { - pub(crate) fn new_invalid() -> Self { Self($parent::new_invalid()) } + pub(crate) fn new_invalid() -> Self { Self(<$parent>::new_invalid()) } pub(crate) fn is_invalid(&self) -> bool { self.0.is_invalid() } pub fn upcast(self) -> $parent { self.0 } } @@ -357,7 +356,7 @@ impl Display for Identifier { } } -#[derive(Debug, Clone, PartialOrd, Ord)] +#[derive(Debug, Clone, PartialOrd, Ord, PartialEq, Eq)] pub enum ParserTypeVariant { // Basic builtin Message, @@ -429,11 +428,9 @@ pub enum ConcreteTypePart { // Builtin types without nested types Message, Bool, - Byte, - Short, - Int, - Long, - String, + UInt8, UInt16, UInt32, UInt64, + SInt8, SInt16, SInt32, SInt64, + Character, String, // Builtin types with one nested type Array, Slice, @@ -1016,12 +1013,6 @@ impl Statement { pub fn as_channel(&self) -> &ChannelStatement { self.as_local().as_channel() } - pub fn as_skip(&self) -> &SkipStatement { - match self { - Statement::Skip(result) => result, - _ => panic!("Unable to cast `Statement` to `SkipStatement`"), - } - } pub fn as_labeled(&self) -> &LabeledStatement { match self { Statement::Labeled(result) => result, @@ -1495,18 +1486,6 @@ impl Expression { _ => panic!("Unable to cast `Expression` to `SelectExpression`"), } } - pub fn as_array(&self) -> &ArrayExpression { - match self { - Expression::Array(result) => result, - _ => panic!("Unable to cast `Expression` to `ArrayExpression`"), - } - } - pub fn as_constant(&self) -> &LiteralExpression { - match self { - Expression::Literal(result) => result, - _ => panic!("Unable to cast `Expression` to `ConstantExpression`"), - } - } pub fn as_call(&self) -> &CallExpression { match self { Expression::Call(result) => result, @@ -1793,7 +1772,7 @@ pub struct CallExpression { pub concrete_type: ConcreteType, } -#[derive(Debug, Clone)] +#[derive(Debug, Clone, PartialEq, Eq)] pub enum Method { // Builtin Get, @@ -1875,7 +1854,7 @@ impl Literal { #[derive(Debug, Clone)] pub struct LiteralInteger { pub(crate) unsigned_value: u64, - pub(crate) negated: bool, // for constant expression evaluation, TODO + pub(crate) negated: bool, // for constant expression evaluation, TODO: @Int } #[derive(Debug, Clone)] diff --git a/src/protocol/ast_printer.rs b/src/protocol/ast_printer.rs index ddb1fa2942d742b89be6198f1962e30cee85809d..d991cbc9cae9a0e4bc20871e2b02ab6b41ddb734 100644 --- a/src/protocol/ast_printer.rs +++ b/src/protocol/ast_printer.rs @@ -2,6 +2,7 @@ use std::fmt::{Debug, Display, Write}; use std::io::Write as IOWrite; use super::ast::*; +use super::token_parsing::*; const INDENT: usize = 2; @@ -48,14 +49,13 @@ const PREFIX_UNARY_EXPR_ID: &'static str = "EUna"; const PREFIX_INDEXING_EXPR_ID: &'static str = "EIdx"; const PREFIX_SLICING_EXPR_ID: &'static str = "ESli"; const PREFIX_SELECT_EXPR_ID: &'static str = "ESel"; -const PREFIX_ARRAY_EXPR_ID: &'static str = "EArr"; -const PREFIX_CONST_EXPR_ID: &'static str = "ECns"; +const PREFIX_LITERAL_EXPR_ID: &'static str = "ELit"; const PREFIX_CALL_EXPR_ID: &'static str = "ECll"; const PREFIX_VARIABLE_EXPR_ID: &'static str = "EVar"; struct KV<'a> { buffer: &'a mut String, - prefix: Option<(&'static str, u32)>, + prefix: Option<(&'static str, i32)>, indent: usize, temp_key: &'a mut String, temp_val: &'a mut String, @@ -74,7 +74,7 @@ impl<'a> KV<'a> { } } - fn with_id(mut self, prefix: &'static str, id: u32) -> Self { + fn with_id(mut self, prefix: &'static str, id: i32) -> Self { self.prefix = Some((prefix, id)); self } @@ -104,8 +104,8 @@ impl<'a> KV<'a> { self } - fn with_ascii_val(self, val: &[u8]) -> Self { - self.temp_val.push_str(&*String::from_utf8_lossy(val)); + fn with_identifier_val(self, val: &Identifier) -> Self { + self.temp_val.push_str(val.value.as_str()); self } @@ -117,11 +117,11 @@ impl<'a> KV<'a> { self } - fn with_opt_ascii_val(self, val: Option<&[u8]>) -> Self { + fn with_opt_identifier_val(self, val: Option<&Identifier>) -> Self { match val { Some(v) => { self.temp_val.push_str("Some("); - self.temp_val.push_str(&*String::from_utf8_lossy(v)); + self.temp_val.push_str(v.value.as_str()); self.temp_val.push(')'); }, None => { @@ -224,7 +224,7 @@ impl ASTWriter { Pragma::Module(pragma) => { self.kv(indent).with_id(PREFIX_PRAGMA_ID, pragma.this.index) .with_s_key("PragmaModule") - .with_ascii_val(&pragma.value); + .with_identifier_val(&pragma.value); } } } @@ -238,8 +238,8 @@ impl ASTWriter { self.kv(indent).with_id(PREFIX_IMPORT_ID, import.this.index) .with_s_key("ImportModule"); - self.kv(indent2).with_s_key("Name").with_ascii_val(&import.module); - self.kv(indent2).with_s_key("Alias").with_ascii_val(&import.alias.value); + self.kv(indent2).with_s_key("Name").with_identifier_val(&import.module); + self.kv(indent2).with_s_key("Alias").with_identifier_val(&import.alias); self.kv(indent2).with_s_key("Target") .with_opt_disp_val(import.module_id.as_ref().map(|v| &v.index)); }, @@ -247,7 +247,7 @@ impl ASTWriter { self.kv(indent).with_id(PREFIX_IMPORT_ID, import.this.index) .with_s_key("ImportSymbol"); - self.kv(indent2).with_s_key("Name").with_ascii_val(&import.module); + self.kv(indent2).with_s_key("Name").with_identifier_val(&import.module); self.kv(indent2).with_s_key("Target") .with_opt_disp_val(import.module_id.as_ref().map(|v| &v.index)); @@ -257,8 +257,8 @@ impl ASTWriter { let indent4 = indent3 + 1; for symbol in &import.symbols { self.kv(indent3).with_s_key("AliasedSymbol"); - self.kv(indent4).with_s_key("Name").with_ascii_val(&symbol.name.value); - self.kv(indent4).with_s_key("Alias").with_ascii_val(&symbol.alias.value); + self.kv(indent4).with_s_key("Name").with_identifier_val(&symbol.name); + self.kv(indent4).with_s_key("Alias").with_opt_identifier_val(symbol.alias.as_ref()); self.kv(indent4).with_s_key("Definition") .with_opt_disp_val(symbol.definition_id.as_ref().map(|v| &v.index)); } @@ -281,34 +281,34 @@ impl ASTWriter { self.kv(indent).with_id(PREFIX_STRUCT_ID, def.this.0.index) .with_s_key("DefinitionStruct"); - self.kv(indent2).with_s_key("Name").with_ascii_val(&def.identifier.value); + self.kv(indent2).with_s_key("Name").with_identifier_val(&def.identifier); for poly_var_id in &def.poly_vars { - self.kv(indent3).with_s_key("PolyVar").with_ascii_val(&poly_var_id.value); + self.kv(indent3).with_s_key("PolyVar").with_identifier_val(&poly_var_id); } self.kv(indent2).with_s_key("Fields"); for field in &def.fields { self.kv(indent3).with_s_key("Field"); self.kv(indent4).with_s_key("Name") - .with_ascii_val(&field.field.value); + .with_identifier_val(&field.field); self.kv(indent4).with_s_key("Type") - .with_custom_val(|s| write_parser_type(s, heap, &heap[field.parser_type])); + .with_custom_val(|s| write_parser_type(s, heap, &field.parser_type)); } }, Definition::Enum(def) => { self.kv(indent).with_id(PREFIX_ENUM_ID, def.this.0.index) .with_s_key("DefinitionEnum"); - self.kv(indent2).with_s_key("Name").with_ascii_val(&def.identifier.value); + self.kv(indent2).with_s_key("Name").with_identifier_val(&def.identifier); for poly_var_id in &def.poly_vars { - self.kv(indent3).with_s_key("PolyVar").with_ascii_val(&poly_var_id.value); + self.kv(indent3).with_s_key("PolyVar").with_identifier_val(&poly_var_id); } self.kv(indent2).with_s_key("Variants"); for variant in &def.variants { self.kv(indent3).with_s_key("Variant"); self.kv(indent4).with_s_key("Name") - .with_ascii_val(&variant.identifier.value); + .with_identifier_val(&variant.identifier); let variant_value = self.kv(indent4).with_s_key("Value"); match &variant.value { EnumVariantValue::None => variant_value.with_s_val("None"), @@ -320,16 +320,16 @@ impl ASTWriter { self.kv(indent).with_id(PREFIX_UNION_ID, def.this.0.index) .with_s_key("DefinitionUnion"); - self.kv(indent2).with_s_key("Name").with_ascii_val(&def.identifier.value); + self.kv(indent2).with_s_key("Name").with_identifier_val(&def.identifier); for poly_var_id in &def.poly_vars { - self.kv(indent3).with_s_key("PolyVar").with_ascii_val(&poly_var_id.value); + self.kv(indent3).with_s_key("PolyVar").with_identifier_val(&poly_var_id); } self.kv(indent2).with_s_key("Variants"); for variant in &def.variants { self.kv(indent3).with_s_key("Variant"); self.kv(indent4).with_s_key("Name") - .with_ascii_val(&variant.identifier.value); + .with_identifier_val(&variant.identifier); match &variant.value { UnionVariantValue::None => { @@ -339,7 +339,7 @@ impl ASTWriter { self.kv(indent4).with_s_key("Values"); for embedded in embedded { self.kv(indent4+1).with_s_key("Value") - .with_custom_val(|v| write_parser_type(v, heap, &heap[*embedded])); + .with_custom_val(|v| write_parser_type(v, heap, embedded)); } } } @@ -349,12 +349,16 @@ impl ASTWriter { self.kv(indent).with_id(PREFIX_FUNCTION_ID, def.this.0.index) .with_s_key("DefinitionFunction"); - self.kv(indent2).with_s_key("Name").with_ascii_val(&def.identifier.value); + self.kv(indent2).with_s_key("Name").with_identifier_val(&def.identifier); for poly_var_id in &def.poly_vars { - self.kv(indent3).with_s_key("PolyVar").with_ascii_val(&poly_var_id.value); + self.kv(indent3).with_s_key("PolyVar").with_identifier_val(&poly_var_id); } - self.kv(indent2).with_s_key("ReturnParserType").with_custom_val(|s| write_parser_type(s, heap, &heap[def.return_type])); + self.kv(indent2).with_s_key("ReturnParserTypes"); + for return_type in &def.return_types { + self.kv(indent3).with_s_key("ReturnParserType") + .with_custom_val(|s| write_parser_type(s, heap, return_type)); + } self.kv(indent2).with_s_key("Parameters"); for param_id in &def.parameters { @@ -362,18 +366,18 @@ impl ASTWriter { } self.kv(indent2).with_s_key("Body"); - self.write_stmt(heap, def.body, indent3); + self.write_stmt(heap, def.body.upcast(), indent3); }, Definition::Component(def) => { self.kv(indent).with_id(PREFIX_COMPONENT_ID,def.this.0.index) .with_s_key("DefinitionComponent"); - self.kv(indent2).with_s_key("Name").with_ascii_val(&def.identifier.value); + self.kv(indent2).with_s_key("Name").with_identifier_val(&def.identifier); self.kv(indent2).with_s_key("Variant").with_debug_val(&def.variant); self.kv(indent2).with_s_key("PolymorphicVariables"); for poly_var_id in &def.poly_vars { - self.kv(indent3).with_s_key("PolyVar").with_ascii_val(&poly_var_id.value); + self.kv(indent3).with_s_key("PolyVar").with_identifier_val(&poly_var_id); } self.kv(indent2).with_s_key("Parameters"); @@ -382,7 +386,7 @@ impl ASTWriter { } self.kv(indent2).with_s_key("Body"); - self.write_stmt(heap, def.body, indent3); + self.write_stmt(heap, def.body.upcast(), indent3); } } } @@ -393,21 +397,8 @@ impl ASTWriter { self.kv(indent).with_id(PREFIX_PARAMETER_ID, param_id.0.index) .with_s_key("Parameter"); - self.kv(indent2).with_s_key("Name").with_ascii_val(¶m.identifier.value); - self.kv(indent2).with_s_key("ParserType").with_custom_val(|w| write_parser_type(w, heap, &heap[param.parser_type])); - } - - fn write_poly_args(&mut self, heap: &Heap, poly_args: &[ParserTypeId], indent: usize) { - if poly_args.is_empty() { - return - } - - let indent2 = indent + 1; - self.kv(indent).with_s_key("PolymorphicArguments"); - for poly_arg in poly_args { - self.kv(indent2).with_s_key("Argument") - .with_custom_val(|v| write_parser_type(v, heap, &heap[*poly_arg])); - } + self.kv(indent2).with_s_key("Name").with_identifier_val(¶m.identifier); + self.kv(indent2).with_s_key("ParserType").with_custom_val(|w| write_parser_type(w, heap, ¶m.parser_type)); } fn write_stmt(&mut self, heap: &Heap, stmt_id: StatementId, indent: usize) { @@ -448,17 +439,11 @@ impl ASTWriter { } } }, - Statement::Skip(stmt) => { - self.kv(indent).with_id(PREFIX_SKIP_STMT_ID, stmt.this.0.index) - .with_s_key("Skip"); - self.kv(indent2).with_s_key("Next") - .with_opt_disp_val(stmt.next.as_ref().map(|v| &v.index)); - }, Statement::Labeled(stmt) => { self.kv(indent).with_id(PREFIX_LABELED_STMT_ID, stmt.this.0.index) .with_s_key("Labeled"); - self.kv(indent2).with_s_key("Label").with_ascii_val(&stmt.label.value); + self.kv(indent2).with_s_key("Label").with_identifier_val(&stmt.label); self.kv(indent2).with_s_key("Statement"); self.write_stmt(heap, stmt.body, indent3); }, @@ -473,10 +458,12 @@ impl ASTWriter { self.write_expr(heap, stmt.test, indent3); self.kv(indent2).with_s_key("TrueBody"); - self.write_stmt(heap, stmt.true_body, indent3); + self.write_stmt(heap, stmt.true_body.upcast(), indent3); - self.kv(indent2).with_s_key("FalseBody"); - self.write_stmt(heap, stmt.false_body, indent3); + if let Some(false_body) = stmt.false_body { + self.kv(indent2).with_s_key("FalseBody"); + self.write_stmt(heap, false_body.upcast(), indent3); + } }, Statement::EndIf(stmt) => { self.kv(indent).with_id(PREFIX_ENDIF_STMT_ID, stmt.this.0.index) @@ -496,7 +483,7 @@ impl ASTWriter { self.kv(indent2).with_s_key("Condition"); self.write_expr(heap, stmt.test, indent3); self.kv(indent2).with_s_key("Body"); - self.write_stmt(heap, stmt.body, indent3); + self.write_stmt(heap, stmt.body.upcast(), indent3); }, Statement::EndWhile(stmt) => { self.kv(indent).with_id(PREFIX_ENDWHILE_STMT_ID, stmt.this.0.index) @@ -509,7 +496,7 @@ impl ASTWriter { self.kv(indent).with_id(PREFIX_BREAK_STMT_ID, stmt.this.0.index) .with_s_key("Break"); self.kv(indent2).with_s_key("Label") - .with_opt_ascii_val(stmt.label.as_ref().map(|v| v.value.as_slice())); + .with_opt_identifier_val(stmt.label.as_ref()); self.kv(indent2).with_s_key("Target") .with_opt_disp_val(stmt.target.as_ref().map(|v| &v.0.index)); }, @@ -517,7 +504,7 @@ impl ASTWriter { self.kv(indent).with_id(PREFIX_CONTINUE_STMT_ID, stmt.this.0.index) .with_s_key("Continue"); self.kv(indent2).with_s_key("Label") - .with_opt_ascii_val(stmt.label.as_ref().map(|v| v.value.as_slice())); + .with_opt_identifier_val(stmt.label.as_ref()); self.kv(indent2).with_s_key("Target") .with_opt_disp_val(stmt.target.as_ref().map(|v| &v.0.index)); }, @@ -527,7 +514,7 @@ impl ASTWriter { self.kv(indent2).with_s_key("EndSync") .with_opt_disp_val(stmt.end_sync.as_ref().map(|v| &v.0.index)); self.kv(indent2).with_s_key("Body"); - self.write_stmt(heap, stmt.body, indent3); + self.write_stmt(heap, stmt.body.upcast(), indent3); }, Statement::EndSynchronous(stmt) => { self.kv(indent).with_id(PREFIX_ENDSYNC_STMT_ID, stmt.this.0.index) @@ -542,18 +529,10 @@ impl ASTWriter { self.kv(indent2).with_s_key("Expression"); self.write_expr(heap, stmt.expression, indent3); }, - Statement::Assert(stmt) => { - self.kv(indent).with_id(PREFIX_ASSERT_STMT_ID, stmt.this.0.index) - .with_s_key("Assert"); - self.kv(indent2).with_s_key("Expression"); - self.write_expr(heap, stmt.expression, indent3); - self.kv(indent2).with_s_key("Next") - .with_opt_disp_val(stmt.next.as_ref().map(|v| &v.index)); - }, Statement::Goto(stmt) => { self.kv(indent).with_id(PREFIX_GOTO_STMT_ID, stmt.this.0.index) .with_s_key("Goto"); - self.kv(indent2).with_s_key("Label").with_ascii_val(&stmt.label.value); + self.kv(indent2).with_s_key("Label").with_identifier_val(&stmt.label); self.kv(indent2).with_s_key("Target") .with_opt_disp_val(stmt.target.as_ref().map(|v| &v.0.index)); }, @@ -682,7 +661,7 @@ impl ASTWriter { self.kv(indent2).with_s_key("Field").with_s_val("length"); }, Field::Symbolic(field) => { - self.kv(indent2).with_s_key("Field").with_ascii_val(&field.identifier.value); + self.kv(indent2).with_s_key("Field").with_identifier_val(&field.identifier); self.kv(indent3).with_s_key("Definition").with_opt_disp_val(field.definition.as_ref().map(|v| &v.index)); self.kv(indent3).with_s_key("Index").with_disp_val(&field.field_idx); } @@ -692,42 +671,31 @@ impl ASTWriter { self.kv(indent2).with_s_key("ConcreteType") .with_custom_val(|v| write_concrete_type(v, heap, def_id, &expr.concrete_type)); }, - Expression::Array(expr) => { - self.kv(indent).with_id(PREFIX_ARRAY_EXPR_ID, expr.this.0.index) - .with_s_key("ArrayExpr"); - self.kv(indent2).with_s_key("Elements"); - for expr_id in &expr.elements { - self.write_expr(heap, *expr_id, indent3); - } - - self.kv(indent2).with_s_key("Parent") - .with_custom_val(|v| write_expression_parent(v, &expr.parent)); - self.kv(indent2).with_s_key("ConcreteType") - .with_custom_val(|v| write_concrete_type(v, heap, def_id, &expr.concrete_type)); - }, Expression::Literal(expr) => { - self.kv(indent).with_id(PREFIX_CONST_EXPR_ID, expr.this.0.index) - .with_s_key("ConstantExpr"); + self.kv(indent).with_id(PREFIX_LITERAL_EXPR_ID, expr.this.0.index) + .with_s_key("LiteralExpr"); let val = self.kv(indent2).with_s_key("Value"); match &expr.value { Literal::Null => { val.with_s_val("null"); }, Literal::True => { val.with_s_val("true"); }, Literal::False => { val.with_s_val("false"); }, - Literal::Character(data) => { val.with_ascii_val(data); }, - Literal::Integer(data) => { val.with_disp_val(data); }, + Literal::Character(data) => { val.with_disp_val(data); }, + Literal::String(data) => { val.with_disp_val(data.as_str()); }, + Literal::Integer(data) => { val.with_debug_val(data); }, Literal::Struct(data) => { val.with_s_val("Struct"); let indent4 = indent3 + 1; - self.write_poly_args(heap, &data.poly_args2, indent3); + self.kv(indent3).with_s_key("ParserType") + .with_custom_val(|t| write_parser_type(t, heap, &data.parser_type)); self.kv(indent3).with_s_key("Definition").with_custom_val(|s| { write_option(s, data.definition.as_ref().map(|v| &v.index)); }); for field in &data.fields { self.kv(indent3).with_s_key("Field"); - self.kv(indent4).with_s_key("Name").with_ascii_val(&field.identifier.value); + self.kv(indent4).with_s_key("Name").with_identifier_val(&field.identifier); self.kv(indent4).with_s_key("Index").with_disp_val(&field.field_idx); self.kv(indent4).with_s_key("ParserType"); self.write_expr(heap, field.value, indent4 + 1); @@ -736,7 +704,8 @@ impl ASTWriter { Literal::Enum(data) => { val.with_s_val("Enum"); - self.write_poly_args(heap, &data.poly_args2, indent3); + self.kv(indent3).with_s_key("ParserType") + .with_custom_val(|t| write_parser_type(t, heap, &data.parser_type)); self.kv(indent3).with_s_key("Definition").with_custom_val(|s| { write_option(s, data.definition.as_ref().map(|v| &v.index)) }); @@ -745,7 +714,9 @@ impl ASTWriter { Literal::Union(data) => { val.with_s_val("Union"); let indent4 = indent3 + 1; - self.write_poly_args(heap, &data.poly_args2, indent3); + + self.kv(indent3).with_s_key("ParserType") + .with_custom_val(|t| write_parser_type(t, heap, &data.parser_type)); self.kv(indent3).with_s_key("Definition").with_custom_val(|s| { write_option(s, data.definition.as_ref().map(|v| &v.index)); }); @@ -756,6 +727,15 @@ impl ASTWriter { self.write_expr(heap, *value, indent4); } } + Literal::Array(data) => { + val.with_s_val("Array"); + let indent4 = indent3 + 1; + + self.kv(indent3).with_s_key("Elements"); + for expr_id in data { + self.write_expr(heap, *expr_id, indent4); + } + } } self.kv(indent2).with_s_key("Parent") @@ -767,22 +747,21 @@ impl ASTWriter { self.kv(indent).with_id(PREFIX_CALL_EXPR_ID, expr.this.0.index) .with_s_key("CallExpr"); - // Method - let method = self.kv(indent2).with_s_key("Method"); - match &expr.method { - Method::Get => { method.with_s_val("get"); }, - Method::Put => { method.with_s_val("put"); }, - Method::Fires => { method.with_s_val("fires"); }, - Method::Create => { method.with_s_val("create"); }, - Method::Symbolic(symbolic) => { - method.with_s_val("symbolic"); - self.kv(indent3).with_s_key("Name").with_ascii_val(&symbolic.identifier.value); - self.kv(indent3).with_s_key("Definition") - .with_opt_disp_val(symbolic.definition.as_ref().map(|v| &v.index)); - } + let definition = &heap[expr.definition]; + match definition { + Definition::Component(definition) => { + self.kv(indent2).with_s_key("BuiltIn").with_disp_val(&false); + self.kv(indent2).with_s_key("Variant").with_debug_val(&definition.variant); + }, + Definition::Function(definition) => { + self.kv(indent2).with_s_key("BuiltIn").with_disp_val(&definition.builtin); + self.kv(indent2).with_s_key("Variant").with_s_val("Function"); + }, + _ => unreachable!() } - - self.write_poly_args(heap, &expr.poly_args, indent2); + self.kv(indent2).with_s_key("MethodName").with_identifier_val(definition.identifier()); + self.kv(indent2).with_s_key("ParserType") + .with_custom_val(|t| write_parser_type(t, heap, &expr.parser_type)); // Arguments self.kv(indent2).with_s_key("Arguments"); @@ -799,7 +778,7 @@ impl ASTWriter { Expression::Variable(expr) => { self.kv(indent).with_id(PREFIX_VARIABLE_EXPR_ID, expr.this.0.index) .with_s_key("VariableExpr"); - self.kv(indent2).with_s_key("Name").with_ascii_val(&expr.identifier.value); + self.kv(indent2).with_s_key("Name").with_identifier_val(&expr.identifier); self.kv(indent2).with_s_key("Definition") .with_opt_disp_val(expr.declaration.as_ref().map(|v| &v.index)); self.kv(indent2).with_s_key("Parent") @@ -817,9 +796,9 @@ impl ASTWriter { self.kv(indent).with_id(PREFIX_LOCAL_ID, local_id.0.index) .with_s_key("Local"); - self.kv(indent2).with_s_key("Name").with_ascii_val(&local.identifier.value); + self.kv(indent2).with_s_key("Name").with_identifier_val(&local.identifier); self.kv(indent2).with_s_key("ParserType") - .with_custom_val(|w| write_parser_type(w, heap, &heap[local.parser_type])); + .with_custom_val(|w| write_parser_type(w, heap, &local.parser_type)); } //-------------------------------------------------------------------------- @@ -847,45 +826,71 @@ fn write_option(target: &mut String, value: Option) { fn write_parser_type(target: &mut String, heap: &Heap, t: &ParserType) { use ParserTypeVariant as PTV; - let mut embedded = Vec::new(); - match &t.variant { - PTV::Input(id) => { target.push_str("in"); embedded.push(*id); } - PTV::Output(id) => { target.push_str("out"); embedded.push(*id) } - PTV::Array(id) => { target.push_str("array"); embedded.push(*id) } - PTV::Message => { target.push_str("msg"); } - PTV::Bool => { target.push_str("bool"); } - PTV::Byte => { target.push_str("byte"); } - PTV::Short => { target.push_str("short"); } - PTV::Int => { target.push_str("int"); } - PTV::Long => { target.push_str("long"); } - PTV::String => { target.push_str("str"); } - PTV::IntegerLiteral => { target.push_str("int_lit"); } - PTV::Inferred => { target.push_str("auto"); } - PTV::Symbolic(symbolic) => { - target.push_str(&String::from_utf8_lossy(&symbolic.identifier.value)); - match symbolic.variant { - Some(SymbolicParserTypeVariant::PolyArg(def_id, idx)) => { - target.push_str(&format!("{{def: {}, idx: {}}}", def_id.index, idx)); - }, - Some(SymbolicParserTypeVariant::Definition(def_id)) => { - target.push_str(&format!("{{def: {}}}", def_id.index)); - }, - None => { - target.push_str("{None}"); + fn push_bytes(target: &mut String, msg: &[u8]) { + target.push_str(&String::from_utf8_lossy(msg)); + } + + fn write_element(target: &mut String, heap: &Heap, t: &ParserType, mut element_idx: usize) -> usize { + let element = &t.elements[element_idx]; + match &element.variant { + PTV::Message => { push_bytes(target, KW_TYPE_MESSAGE); }, + PTV::Bool => { push_bytes(target, KW_TYPE_BOOL); }, + PTV::UInt8 => { push_bytes(target, KW_TYPE_UINT8); }, + PTV::UInt16 => { push_bytes(target, KW_TYPE_UINT16); }, + PTV::UInt32 => { push_bytes(target, KW_TYPE_UINT32); }, + PTV::UInt64 => { push_bytes(target, KW_TYPE_UINT64); }, + PTV::SInt8 => { push_bytes(target, KW_TYPE_SINT8); }, + PTV::SInt16 => { push_bytes(target, KW_TYPE_SINT16); }, + PTV::SInt32 => { push_bytes(target, KW_TYPE_SINT32); }, + PTV::SInt64 => { push_bytes(target, KW_TYPE_SINT64); }, + PTV::Character => { push_bytes(target, KW_TYPE_CHAR); }, + PTV::String => { push_bytes(target, KW_TYPE_STRING); }, + PTV::IntegerLiteral => { target.push_str("int_literal"); }, + PTV::Inferred => { push_bytes(target, KW_TYPE_INFERRED); }, + PTV::Array => { + element_idx = write_element(target, heap, t, element_idx + 1); + target.push_str("[]"); + }, + PTV::Input => { + push_bytes(target, KW_TYPE_IN_PORT); + target.push('<'); + element_idx = write_element(target, heap, t, element_idx + 1); + target.push('>'); + }, + PTV::Output => { + push_bytes(target, KW_TYPE_OUT_PORT); + target.push('<'); + element_idx = write_element(target, heap, t, element_idx + 1); + target.push('>'); + }, + PTV::PolymorphicArgument(definition_id, arg_idx) => { + let definition = &heap[*definition_id]; + let poly_var = &definition.poly_vars()[*arg_idx].value; + target.write_str(poly_var.as_str()); + }, + PTV::Definition(definition_id, num_embedded) => { + let definition = &heap[*definition_id]; + let definition_ident = definition.identifier().value.as_str(); + target.write_str(definition_ident); + + let num_embedded = *num_embedded; + if num_embedded != 0 { + target.push('<'); + for embedded_idx in 0..num_embedded { + if embedded_idx != 0 { + target.push(','); + } + element_idx = write_element(target, heap, t, element_idx + 1); + } + target.push('>'); } } - embedded.extend(&symbolic.poly_args2); } - }; - if !embedded.is_empty() { - target.push_str("<"); - for (idx, embedded_id) in embedded.into_iter().enumerate() { - if idx != 0 { target.push_str(", "); } - write_parser_type(target, heap, &heap[embedded_id]); - } - target.push_str(">"); + element_idx } + + write_element(target, heap, t, 0); } fn write_concrete_type(target: &mut String, heap: &Heap, def_id: DefinitionId, t: &ConcreteType) { @@ -900,12 +905,8 @@ fn write_concrete_type(target: &mut String, heap: &Heap, def_id: DefinitionId, t CTP::Marker(marker) => { // Marker points to polymorphic variable index let definition = &heap[def_id]; - let poly_var_ident = match definition { - Definition::Struct(_) | Definition::Enum(_) | Definition::Union(_) => unreachable!(), - Definition::Function(definition) => &definition.poly_vars[*marker].value, - Definition::Component(definition) => &definition.poly_vars[*marker].value, - }; - target.push_str(&String::from_utf8_lossy(&poly_var_ident)); + let poly_var_ident = &definition.poly_vars()[*marker]; + target.push_str(poly_var_ident.value.as_str()); idx = write_concrete_part(target, heap, def_id, t, idx + 1); }, CTP::Void => target.push_str("void"), @@ -936,7 +937,7 @@ fn write_concrete_type(target: &mut String, heap: &Heap, def_id: DefinitionId, t }, CTP::Instance(definition_id, num_embedded) => { let identifier = heap[*definition_id].identifier(); - target.push_str(&String::from_utf8_lossy(&identifier.value)); + target.push_str(identifier.value.as_str()); target.push('<'); for idx_embedded in 0..*num_embedded { if idx_embedded != 0 { diff --git a/src/protocol/eval.rs b/src/protocol/eval.rs index 5412525d59afbe05d47e0cb0ea8e6c8dbd0bb414..ea55369d378de1166cef1b73822aeba19ef47355 100644 --- a/src/protocol/eval.rs +++ b/src/protocol/eval.rs @@ -75,21 +75,25 @@ impl Value { Literal::False => Value::Boolean(BooleanValue(false)), Literal::Integer(val) => { // Convert raw ASCII data to UTF-8 string - let val = *val; - if val >= BYTE_MIN && val <= BYTE_MAX { - Value::Byte(ByteValue(val as i8)) - } else if val >= SHORT_MIN && val <= SHORT_MAX { - Value::Short(ShortValue(val as i16)) - } else if val >= INT_MIN && val <= INT_MAX { - Value::Int(IntValue(val as i32)) + let mut integer_value = val.unsigned_value as i64; // TODO: @Int + if val.negated { integer_value = -integer_value; }; + + if integer_value >= BYTE_MIN && integer_value <= BYTE_MAX { + Value::Byte(ByteValue(integer_value as i8)) + } else if integer_value >= SHORT_MIN && integer_value <= SHORT_MAX { + Value::Short(ShortValue(integer_value as i16)) + } else if integer_value >= INT_MIN && integer_value <= INT_MAX { + Value::Int(IntValue(integer_value as i32)) } else { - Value::Long(LongValue(val)) + Value::Long(LongValue(integer_value)) } } Literal::Character(_data) => unimplemented!(), + Literal::String(_data) => unimplemented!(), Literal::Struct(_data) => unimplemented!(), Literal::Enum(_data) => unimplemented!(), Literal::Union(_data) => unimplemented!(), + Literal::Array(expressions) => unimplemented!(), } } fn set(&mut self, index: &Value, value: &Value) -> Option { @@ -913,7 +917,7 @@ impl ValueImpl for InputValue { fn is_type_compatible_hack(_h: &Heap, t: &ParserType) -> bool { use ParserTypeVariant::*; match &t.variant { - Input(_) | Inferred | Symbolic(_) => true, + Input | Inferred | Definition(_, _) => true, _ => false, } } @@ -934,8 +938,8 @@ impl ValueImpl for OutputValue { } fn is_type_compatible_hack(_h: &Heap, t: &ParserType) -> bool { use ParserTypeVariant::*; - match &t.variant { - Output(_) | Inferred | Symbolic(_) => true, + match &t.elements[0].variant { + Output | Inferred | Definition(_, _) => true, _ => false, } } @@ -966,8 +970,8 @@ impl ValueImpl for MessageValue { } fn is_type_compatible_hack(_h: &Heap, t: &ParserType) -> bool { use ParserTypeVariant::*; - match &t.variant { - Message | Inferred | Symbolic(_) => true, + match &t.elements[0].variant { + Message | Inferred | Definition(_, _) => true, _ => false, } } @@ -988,8 +992,10 @@ impl ValueImpl for BooleanValue { } fn is_type_compatible_hack(_h: &Heap, t: &ParserType) -> bool { use ParserTypeVariant::*; - match t.variant { - Symbolic(_) | Inferred | Bool | Byte | Short | Int | Long => true, + match t.elements[0].variant { + Definition(_, _) | Inferred | Bool | + UInt8 | UInt16 | UInt32 | UInt64 | + SInt8 | SInt16 | SInt32 | SInt64 => true, _ => false } } @@ -1010,8 +1016,10 @@ impl ValueImpl for ByteValue { } fn is_type_compatible_hack(_h: &Heap, t: &ParserType) -> bool { use ParserTypeVariant::*; - match t.variant { - Symbolic(_) | Inferred | Byte | Short | Int | Long => true, + match t.elements[0].variant { + Definition(_, _) | Inferred | + UInt8 | UInt16 | UInt32 | UInt64 | + SInt8 | SInt16 | SInt32 | SInt64 => true, _ => false } } @@ -1032,8 +1040,10 @@ impl ValueImpl for ShortValue { } fn is_type_compatible_hack(_h: &Heap, t: &ParserType) -> bool { use ParserTypeVariant::*; - match t.variant { - Symbolic(_) | Inferred | Short | Int | Long => true, + match t.elements[0].variant { + Definition(_, _) | Inferred | + UInt16 | UInt32 | UInt64 | + SInt16 | SInt32 | SInt64=> true, _ => false } } @@ -1054,8 +1064,10 @@ impl ValueImpl for IntValue { } fn is_type_compatible_hack(_h: &Heap, t: &ParserType) -> bool { use ParserTypeVariant::*; - match t.variant { - Symbolic(_) | Inferred | Int | Long => true, + match t.elements[0].variant { + Definition(_, _) | Inferred | + UInt32 | UInt64 | + SInt32 | SInt64 => true, _ => false } } @@ -1076,17 +1088,18 @@ impl ValueImpl for LongValue { } fn is_type_compatible_hack(_h: &Heap, t: &ParserType) -> bool { use ParserTypeVariant::*; - match &t.variant { - Long | Inferred | Symbolic(_) => true, + match &t.elements[0].variant { + UInt64 | SInt64 | Inferred | Definition(_, _) => true, _ => false, } } } -fn get_array_inner(t: &ParserType) -> Option { - match t.variant { - ParserTypeVariant::Array(inner) => Some(inner), - _ => None +fn get_array_inner(t: &ParserType) -> Option { + if t.elements[0].variant == ParserTypeVariant::Array { + return Some(t.elements[1].variant.clone()) + } else { + return None; } } @@ -1333,10 +1346,10 @@ impl Store { fn initialize(&mut self, h: &Heap, var: VariableId, value: Value) { // Ensure value is compatible with type of variable let parser_type = match &h[var] { - Variable::Local(v) => v.parser_type, - Variable::Parameter(v) => v.parser_type, + Variable::Local(v) => &v.parser_type, + Variable::Parameter(v) => &v.parser_type, }; - assert!(value.is_type_compatible(h, &h[parser_type])); + assert!(value.is_type_compatible(h, parser_type)); // Overwrite mapping self.map.insert(var, value.clone()); } @@ -1351,11 +1364,10 @@ impl Store { Expression::Variable(var) => { let var = var.declaration.unwrap(); // Ensure value is compatible with type of variable - let parser_type_id = match &h[var] { - Variable::Local(v) => v.parser_type, - Variable::Parameter(v) => v.parser_type + let parser_type = match &h[var] { + Variable::Local(v) => &v.parser_type, + Variable::Parameter(v) => &v.parser_type }; - let parser_type = &h[parser_type_id]; assert!(value.is_type_compatible(h, parser_type)); // Overwrite mapping self.map.insert(var, value.clone()); @@ -1388,7 +1400,7 @@ impl Store { let value = self .map .get(&var_id) - .expect(&format!("Uninitialized variable {:?}", String::from_utf8_lossy(&var.identifier.value))); + .expect(&format!("Uninitialized variable {:?}", var.identifier.value.as_str())); Ok(value.clone()) } Expression::Indexing(indexing) => { @@ -1516,13 +1528,6 @@ impl Store { Expression::Indexing(expr) => self.get(h, ctx, expr.this.upcast()), Expression::Slicing(_expr) => unimplemented!(), Expression::Select(expr) => self.get(h, ctx, expr.this.upcast()), - Expression::Array(expr) => { - let mut elements = Vec::new(); - for &elem in expr.elements.iter() { - elements.push(self.eval(h, ctx, elem)?); - } - todo!() - } Expression::Literal(expr) => Ok(Value::from_constant(&expr.value)), Expression::Call(expr) => match &expr.method { Method::Get => { @@ -1587,7 +1592,7 @@ pub(crate) struct Prompt { impl Prompt { pub fn new(h: &Heap, def: DefinitionId, args: &Vec) -> Self { let mut prompt = - Prompt { definition: def, store: Store::new(), position: Some((&h[def]).body()) }; + Prompt { definition: def, store: Store::new(), position: Some((&h[def]).body().upcast()) }; prompt.set_arguments(h, args); prompt } @@ -1597,8 +1602,7 @@ impl Prompt { assert_eq!(params.len(), args.len()); for (param, value) in params.iter().zip(args.iter()) { let hparam = &h[*param]; - let parser_type = &h[hparam.parser_type]; - assert!(value.is_type_compatible(h, parser_type)); + assert!(value.is_type_compatible(h, &hparam.parser_type)); self.store.initialize(h, param.upcast(), value.clone()); } } @@ -1646,9 +1650,12 @@ impl Prompt { let value = self.store.eval(h, ctx, stmt.test)?; // Continue with either branch if value.as_boolean().0 { - self.position = Some(stmt.true_body); + self.position = Some(stmt.true_body.upcast()); + } else if let Some(false_body) = stmt.false_body { + self.position = Some(false_body.upcast()); } else { - self.position = Some(stmt.false_body); + // No false body + self.position = Some(stmt.end_if.unwrap().upcast()); } Err(EvalContinuation::Stepping) } @@ -1662,7 +1669,7 @@ impl Prompt { let value = self.store.eval(h, ctx, stmt.test)?; // Either continue with body, or go to next if value.as_boolean().0 { - self.position = Some(stmt.body); + self.position = Some(stmt.body.upcast()); } else { self.position = stmt.end_while.map(|x| x.upcast()); } @@ -1675,7 +1682,7 @@ impl Prompt { } Statement::Synchronous(stmt) => { // Continue to next statement, and signal upward - self.position = Some(stmt.body); + self.position = Some(stmt.body.upcast()); Err(EvalContinuation::SyncBlockStart) } Statement::EndSynchronous(stmt) => { diff --git a/src/protocol/input_source2.rs b/src/protocol/input_source.rs similarity index 87% rename from src/protocol/input_source2.rs rename to src/protocol/input_source.rs index ca14cb91c4ca6c00e130029c16f926d451862731..3de5ce2533f3319f82c4716cfb7d53d7080a7c4f 100644 --- a/src/protocol/input_source2.rs +++ b/src/protocol/input_source.rs @@ -3,33 +3,33 @@ use std::cell::{Ref, RefCell}; use std::fmt::Write; #[derive(Debug, Clone, Copy)] -pub struct InputPosition2 { +pub struct InputPosition { pub line: u32, pub offset: u32, } -impl InputPosition2 { +impl InputPosition { pub(crate) fn with_offset(&self, offset: u32) -> Self { - InputPosition2{ line: self.line, offset: self.offset + offset } + InputPosition { line: self.line, offset: self.offset + offset } } } #[derive(Debug, Clone, Copy)] pub struct InputSpan { - pub begin: InputPosition2, - pub end: InputPosition2, + pub begin: InputPosition, + pub end: InputPosition, } impl InputSpan { #[inline] - pub fn from_positions(begin: InputPosition2, end: InputPosition2) -> Self { + pub fn from_positions(begin: InputPosition, end: InputPosition) -> Self { Self { begin, end } } } /// Wrapper around source file with optional filename. Ensures that the file is /// only scanned once. -pub struct InputSource2 { +pub struct InputSource { pub(crate) filename: String, pub(crate) input: Vec, // Iteration @@ -43,7 +43,7 @@ pub struct InputSource2 { offset_lookup: RefCell>, } -impl InputSource2 { +impl InputSource { pub fn new(filename: String, input: Vec) -> Self { Self{ filename, @@ -62,8 +62,8 @@ impl InputSource2 { } #[inline] - pub fn pos(&self) -> InputPosition2 { - InputPosition2{ line: self.line, offset: self.offset as u32 } + pub fn pos(&self) -> InputPosition { + InputPosition { line: self.line, offset: self.offset as u32 } } pub fn next(&self) -> Option { @@ -84,7 +84,7 @@ impl InputSource2 { } #[inline] - pub fn section_at_pos(&self, start: InputPosition2, end: InputPosition2) -> &[u8] { + pub fn section_at_pos(&self, start: InputPosition, end: InputPosition) -> &[u8] { &self.input[start.offset as usize..end.offset as usize] } @@ -125,7 +125,7 @@ impl InputSource2 { fn set_error(&mut self, msg: &str) { if self.had_error.is_none() { - self.had_error = Some(ParseError::new_error(self, self.pos(), msg)); + self.had_error = Some(ParseError::new_error_str_at_pos(self, self.pos(), msg)); } } @@ -211,7 +211,7 @@ pub struct ParseErrorStatement { } impl ParseErrorStatement { - fn from_source_at_pos(statement_kind: StatementKind, source: &InputSource2, position: InputPosition2, message: String) -> Self { + fn from_source_at_pos(statement_kind: StatementKind, source: &InputSource, position: InputPosition, message: String) -> Self { // Seek line start and end let line_start = source.lookup_line_start_offset(position.line); let line_end = source.lookup_line_end_offset(position.line); @@ -232,7 +232,7 @@ impl ParseErrorStatement { } } - fn from_source_at_span(statement_kind: StatementKind, source: &InputSource2, span: InputSpan, message: String) -> Self { + fn from_source_at_span(statement_kind: StatementKind, source: &InputSource, span: InputSpan, message: String) -> Self { debug_assert!(span.end.line >= span.begin.line); debug_assert!(span.end.offset >= span.begin.offset); @@ -264,7 +264,7 @@ impl ParseErrorStatement { } /// Produces context from source - fn create_context(source: &InputSource2, start: usize, end: usize) -> String { + fn create_context(source: &InputSource, start: usize, end: usize) -> String { let context_raw = &source.input[start..end]; String::from_utf8_lossy(context_raw).to_string() } @@ -336,8 +336,8 @@ impl fmt::Display for ParseErrorStatement { f.write_str(&context)?; annotation.push_str(" | "); - extend_annotation(1, self.start_column, &self.source, &mut annotation, ' '); - extend_annotation(self.start_column, self.end_column, &self.source, &mut annotation, '~'); + extend_annotation(1, self.start_column, &self.context, &mut annotation, ' '); + extend_annotation(self.start_column, self.end_column, &self.context, &mut annotation, '~'); annotation.push('\n'); f.write_str(&annotation)?; @@ -348,14 +348,14 @@ impl fmt::Display for ParseErrorStatement { let mut lines = self.context.lines(); let first_line = lines.next().unwrap(); transform_context(first_line, &mut context); - writeln!(" |- {}", &context)?; + writeln!(f, " |- {}", &context)?; // - remaining lines let mut last_line = first_line; while let Some(cur_line) = lines.next() { context.clear(); transform_context(cur_line, &mut context); - writeln!(" | {}", &context); + writeln!(f, " | {}", &context); last_line = cur_line; } @@ -397,53 +397,53 @@ impl ParseError { Self{ statements: Vec::new() } } - pub fn new_error_at_pos(source: &InputSource2, position: InputPosition2, message: String) -> Self { + pub fn new_error_at_pos(source: &InputSource, position: InputPosition, message: String) -> Self { Self{ statements: vec!(ParseErrorStatement::from_source_at_pos( StatementKind::Error, source, position, message )) } } - pub fn new_error_str_at_pos(source: &InputSource2, position: InputPosition2, message: &str) -> Self { + pub fn new_error_str_at_pos(source: &InputSource, position: InputPosition, message: &str) -> Self { Self{ statements: vec!(ParseErrorStatement::from_source_at_pos( StatementKind::Error, source, position, message.to_string() )) } } - pub fn new_error_at_span(source: &InputSource2, span: InputSpan, message: String) -> Self { + pub fn new_error_at_span(source: &InputSource, span: InputSpan, message: String) -> Self { Self{ statements: vec!(ParseErrorStatement::from_source_at_span( StatementKind::Error, source, span, message )) } } - pub fn new_error_str_at_span(source: &InputSource2, span: InputSpan, message: &str) -> Self { + pub fn new_error_str_at_span(source: &InputSource, span: InputSpan, message: &str) -> Self { Self{ statements: vec!(ParseErrorStatement::from_source_at_span( StatementKind::Error, source, span, message.to_string() )) } } - pub fn with_at_pos(mut self, error_type: StatementKind, source: &InputSource2, position: InputPosition2, message: String) -> Self { + pub fn with_at_pos(mut self, error_type: StatementKind, source: &InputSource, position: InputPosition, message: String) -> Self { self.statements.push(ParseErrorStatement::from_source_at_pos(error_type, source, position, message)); self } - pub fn with_at_span(mut self, error_type: StatementKind, source: &InputSource2, span: InputSpan, message: String) -> Self { + pub fn with_at_span(mut self, error_type: StatementKind, source: &InputSource, span: InputSpan, message: String) -> Self { self.statements.push(ParseErrorStatement::from_source_at_span(error_type, source, span, message.to_string())); self } - pub fn with_info_at_pos(self, source: &InputSource2, position: InputPosition2, msg: String) -> Self { + pub fn with_info_at_pos(self, source: &InputSource, position: InputPosition, msg: String) -> Self { self.with_at_pos(StatementKind::Info, source, position, msg) } - pub fn with_info_str_at_pos(self, source: &InputSource2, position: InputPosition2, msg: &str) -> Self { + pub fn with_info_str_at_pos(self, source: &InputSource, position: InputPosition, msg: &str) -> Self { self.with_at_pos(StatementKind::Info, source, position, msg.to_string()) } - pub fn with_info_at_span(self, source: &InputSource2, span: InputSpan, msg: String) -> Self { + pub fn with_info_at_span(self, source: &InputSource, span: InputSpan, msg: String) -> Self { self.with_at_span(StatementKind::Info, source, span, msg) } - pub fn with_info_str_at_span(self, source: &InputSource2, span: InputSpan, msg: &str) -> Self { + pub fn with_info_str_at_span(self, source: &InputSource, span: InputSpan, msg: &str) -> Self { self.with_at_span(StatementKind::Info, source, span, msg.to_string()) } } diff --git a/src/protocol/inputsource.rs b/src/protocol/inputsource.rs deleted file mode 100644 index 2a12406576d403fccc1a83a713ac1af31e32dd04..0000000000000000000000000000000000000000 --- a/src/protocol/inputsource.rs +++ /dev/null @@ -1,305 +0,0 @@ -use std::fmt; -use std::fs::File; -use std::io; -use std::path::Path; - -use backtrace::Backtrace; - -#[derive(Debug, Clone)] -pub struct InputSource { - pub(crate) filename: String, - pub(crate) input: Vec, - line: usize, - column: usize, - offset: usize, -} - -static STD_LIB_PDL: &'static [u8] = b" -primitive forward(in i, out o) { - while(true) synchronous put(o, get(i)); -} -primitive sync(in i, out o) { - while(true) synchronous if(fires(i)) put(o, get(i)); -} -primitive alternator(in i, out l, out r) { - while(true) { - synchronous if(fires(i)) put(l, get(i)); - synchronous if(fires(i)) put(r, get(i)); - } -} -primitive replicator(in i, out l, out r) { - while(true) synchronous { - if(fires(i)) { - msg m = get(i); - put(l, m); - put(r, m); - } - } -} -primitive merger(in l, in r, out o) { - while(true) synchronous { - if(fires(l)) put(o, get(l)); - else if(fires(r)) put(o, get(r)); - } -} -"; - -impl InputSource { - // Constructors - pub fn new(filename: S, reader: &mut R) -> io::Result { - let mut vec = Vec::new(); - reader.read_to_end(&mut vec)?; - vec.extend(STD_LIB_PDL.to_vec()); - Ok(InputSource { - filename: filename.to_string(), - input: vec, - line: 1, - column: 1, - offset: 0, - }) - } - // Constructor helpers - pub fn from_file(path: &Path) -> io::Result { - let filename = path.file_name(); - match filename { - Some(filename) => { - let mut f = File::open(path)?; - InputSource::new(filename.to_string_lossy(), &mut f) - } - None => Err(io::Error::new(io::ErrorKind::NotFound, "Invalid path")), - } - } - pub fn from_string(string: &str) -> io::Result { - let buffer = Box::new(string); - let mut bytes = buffer.as_bytes(); - InputSource::new(String::new(), &mut bytes) - } - pub fn from_buffer(buffer: &[u8]) -> io::Result { - InputSource::new(String::new(), &mut Box::new(buffer)) - } - // Internal methods - pub fn pos(&self) -> InputPosition { - InputPosition { line: self.line, column: self.column, offset: self.offset } - } - pub fn seek(&mut self, pos: InputPosition) { - debug_assert!(pos.offset < self.input.len()); - self.line = pos.line; - self.column = pos.column; - self.offset = pos.offset; - } - pub fn is_eof(&self) -> bool { - self.next() == None - } - - pub fn next(&self) -> Option { - if self.offset < self.input.len() { - Some(self.input[self.offset]) - } else { - None - } - } - - pub fn lookahead(&self, pos: usize) -> Option { - let offset_pos = self.offset + pos; - if offset_pos < self.input.len() { - Some(self.input[offset_pos]) - } else { - None - } - } - - pub fn has(&self, to_compare: &[u8]) -> bool { - if self.offset + to_compare.len() <= self.input.len() { - for idx in 0..to_compare.len() { - if to_compare[idx] != self.input[self.offset + idx] { - return false; - } - } - - true - } else { - false - } - } - - pub fn consume(&mut self) { - match self.next() { - Some(x) if x == b'\r' && self.lookahead(1) != Some(b'\n') || x == b'\n' => { - self.line += 1; - self.offset += 1; - self.column = 1; - } - Some(_) => { - self.offset += 1; - self.column += 1; - } - None => {} - } - } -} - -impl fmt::Display for InputSource { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - self.pos().fmt(f) - } -} - -#[derive(Debug, Clone, Copy)] -pub struct InputPosition { - pub line: usize, - pub column: usize, - pub offset: usize, -} - -impl InputPosition { - fn context<'a>(&self, source: &'a InputSource) -> &'a [u8] { - let start = self.offset - (self.column - 1); - let mut end = self.offset; - while end < source.input.len() { - let cur = (*source.input)[end]; - if cur == b'\n' || cur == b'\r' { - break; - } - end += 1; - } - &source.input[start..end] - } -} - -impl Default for InputPosition { - fn default() -> Self { - Self{ line: 1, column: 1, offset: 0 } - } -} - -impl fmt::Display for InputPosition { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "{}:{}", self.line, self.column) - } -} - -#[derive(Debug)] -pub enum ParseErrorType { - Info, - Error -} - -#[derive(Debug)] -pub struct ParseErrorStatement { - pub(crate) error_type: ParseErrorType, - pub(crate) position: InputPosition, - pub(crate) filename: String, - pub(crate) context: String, - pub(crate) message: String, -} - -impl ParseErrorStatement { - fn from_source(error_type: ParseErrorType, source: &InputSource, position: InputPosition, msg: &str) -> Self { - // Seek line start and end - let line_start = position.offset - (position.column - 1); - let mut line_end = position.offset; - while line_end < source.input.len() && source.input[line_end] != b'\n' { - line_end += 1; - } - - // Compensate for '\r\n' - if line_end > line_start && source.input[line_end - 1] == b'\r' { - line_end -= 1; - } - - Self{ - error_type, - position, - filename: source.filename.clone(), - context: String::from_utf8_lossy(&source.input[line_start..line_end]).to_string(), - message: msg.to_string() - } - } -} - -impl fmt::Display for ParseErrorStatement { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - // Write message - match self.error_type { - ParseErrorType::Info => write!(f, " INFO: ")?, - ParseErrorType::Error => write!(f, "ERROR: ")?, - } - writeln!(f, "{}", &self.message)?; - - // Write originating file/line/column - if self.filename.is_empty() { - writeln!(f, " +- at {}:{}", self.position.line, self.position.column)?; - } else { - writeln!(f, " +- at {}:{}:{}", self.filename, self.position.line, self.position.column)?; - } - - // Write source context - writeln!(f, " | ")?; - writeln!(f, " | {}", self.context)?; - - // Write underline indicating where the error ocurred - debug_assert!(self.position.column <= self.context.chars().count()); - let mut arrow = String::with_capacity(self.context.len() + 3); - arrow.push_str(" | "); - let mut char_col = 1; - for char in self.context.chars() { - if char_col == self.position.column { break; } - if char == '\t' { - arrow.push('\t'); - } else { - arrow.push(' '); - } - - char_col += 1; - } - arrow.push('^'); - writeln!(f, "{}", arrow)?; - - Ok(()) - } -} - -#[derive(Debug)] -pub struct ParseError { - pub(crate) statements: Vec -} - -impl fmt::Display for ParseError { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - if self.statements.is_empty() { - return Ok(()) - } - - self.statements[0].fmt(f)?; - for statement in self.statements.iter().skip(1) { - writeln!(f)?; - statement.fmt(f)?; - } - - Ok(()) - } -} - -impl ParseError { - pub fn empty() -> Self { - Self{ statements: Vec::new() } - } - - pub fn new_error(source: &InputSource, position: InputPosition, msg: &str) -> Self { - Self{ statements: vec!(ParseErrorStatement::from_source(ParseErrorType::Error, source, position, msg))} - } - - pub fn with_prefixed(mut self, error_type: ParseErrorType, source: &InputSource, position: InputPosition, msg: &str) -> Self { - self.statements.insert(0, ParseErrorStatement::from_source(error_type, source, position, msg)); - self - } - - pub fn with_postfixed(mut self, error_type: ParseErrorType, source: &InputSource, position: InputPosition, msg: &str) -> Self { - self.statements.push(ParseErrorStatement::from_source(error_type, source, position, msg)); - self - } - - pub fn with_postfixed_info(self, source: &InputSource, position: InputPosition, msg: &str) -> Self { - self.with_postfixed(ParseErrorType::Info, source, position, msg) - } -} diff --git a/src/protocol/lexer.rs b/src/protocol/lexer.rs deleted file mode 100644 index 4b8af9794cce90972f97907da61a9c9749bb52ca..0000000000000000000000000000000000000000 --- a/src/protocol/lexer.rs +++ /dev/null @@ -1,2670 +0,0 @@ -use crate::protocol::ast::*; -use crate::protocol::inputsource::*; - -const MAX_LEVEL: usize = 128; -const MAX_NAMESPACES: u8 = 8; // only three levels are supported at the moment - -macro_rules! debug_log { - ($format:literal) => { - enabled_debug_print!(true, "lexer", $format); - }; - ($format:literal, $($args:expr),*) => { - enabled_debug_print!(true, "lexer", $format, $($args),*); - }; -} - -macro_rules! debug_line { - ($source:expr) => { - { - let mut buffer = String::with_capacity(128); - for idx in 0..buffer.capacity() { - let next = $source.lookahead(idx); - if next.is_none() || Some(b'\n') == next { break; } - buffer.push(next.unwrap() as char); - } - buffer - } - }; -} -fn is_vchar(x: Option) -> bool { - if let Some(c) = x { - c >= 0x21 && c <= 0x7E - } else { - false - } -} - -fn is_wsp(x: Option) -> bool { - if let Some(c) = x { - c == b' ' || c == b'\t' - } else { - false - } -} - -fn is_ident_start(x: Option) -> bool { - if let Some(c) = x { - c >= b'A' && c <= b'Z' || c >= b'a' && c <= b'z' - } else { - false - } -} - -fn is_ident_rest(x: Option) -> bool { - if let Some(c) = x { - c >= b'A' && c <= b'Z' || c >= b'a' && c <= b'z' || c >= b'0' && c <= b'9' || c == b'_' - } else { - false - } -} - -fn is_constant(x: Option) -> bool { - if let Some(c) = x { - c >= b'0' && c <= b'9' || c == b'\'' - } else { - false - } -} - -fn is_integer_start(x: Option) -> bool { - if let Some(c) = x { - c >= b'0' && c <= b'9' - } else { - false - } -} - -fn is_integer_rest(x: Option) -> bool { - if let Some(c) = x { - c >= b'0' && c <= b'9' - || c >= b'a' && c <= b'f' - || c >= b'A' && c <= b'F' - || c == b'x' - || c == b'o' - } else { - false - } -} - -fn lowercase(x: u8) -> u8 { - if x >= b'A' && x <= b'Z' { - x - b'A' + b'a' - } else { - x - } -} - -fn identifier_as_namespaced(identifier: Identifier) -> NamespacedIdentifier { - let identifier_len = identifier.value.len(); - debug_assert!(identifier_len < u16::max_value() as usize); - NamespacedIdentifier{ - position: identifier.position, - value: identifier.value, - poly_args: Vec::new(), - parts: vec![ - NamespacedIdentifierPart::Identifier{start: 0, end: identifier_len as u16} - ], - } -} - -pub struct Lexer<'a> { - source: &'a mut InputSource, - level: usize, -} - -impl Lexer<'_> { - pub fn new(source: &mut InputSource) -> Lexer { - Lexer { source, level: 0 } - } - fn error_at_pos(&self, msg: &str) -> ParseError { - ParseError::new_error(self.source, self.source.pos(), msg) - } - fn consume_line(&mut self) -> Result, ParseError> { - let mut result: Vec = Vec::new(); - let mut next = self.source.next(); - while next.is_some() && next != Some(b'\n') && next != Some(b'\r') { - if !(is_vchar(next) || is_wsp(next)) { - return Err(self.error_at_pos("Expected visible character or whitespace")); - } - result.push(next.unwrap()); - self.source.consume(); - next = self.source.next(); - } - if next.is_some() { - self.source.consume(); - } - if next == Some(b'\r') && self.source.next() == Some(b'\n') { - self.source.consume(); - } - Ok(result) - } - fn consume_whitespace(&mut self, expected: bool) -> Result<(), ParseError> { - let mut found = false; - let mut next = self.source.next(); - while next.is_some() { - if next == Some(b' ') - || next == Some(b'\t') - || next == Some(b'\r') - || next == Some(b'\n') - { - self.source.consume(); - next = self.source.next(); - found = true; - continue; - } - if next == Some(b'/') { - next = self.source.lookahead(1); - if next == Some(b'/') { - self.source.consume(); // slash - self.source.consume(); // slash - self.consume_line()?; - next = self.source.next(); - found = true; - continue; - } - if next == Some(b'*') { - self.source.consume(); // slash - self.source.consume(); // star - next = self.source.next(); - while next.is_some() { - if next == Some(b'*') { - next = self.source.lookahead(1); - if next == Some(b'/') { - self.source.consume(); // star - self.source.consume(); // slash - break; - } - } - self.source.consume(); - next = self.source.next(); - } - next = self.source.next(); - found = true; - continue; - } - } - break; - } - if expected && !found { - Err(self.error_at_pos("Expected whitespace")) - } else { - Ok(()) - } - } - fn consume_any_chars(&mut self) { - if !is_ident_start(self.source.next()) { return } - self.source.consume(); - while is_ident_rest(self.source.next()) { - self.source.consume() - } - } - fn has_keyword(&self, keyword: &[u8]) -> bool { - if !self.source.has(keyword) { - return false; - } - - // Word boundary - let next = self.source.lookahead(keyword.len()); - if next.is_none() { return true; } - return !is_ident_rest(next); - } - fn consume_keyword(&mut self, keyword: &[u8]) -> Result<(), ParseError> { - let len = keyword.len(); - for i in 0..len { - let expected = Some(lowercase(keyword[i])); - let next = self.source.next(); - if next != expected { - return Err(self.error_at_pos(&format!("Expected keyword '{}'", String::from_utf8_lossy(keyword)))); - } - self.source.consume(); - } - if let Some(next) = self.source.next() { - if next >= b'A' && next <= b'Z' || next >= b'a' && next <= b'z' || next >= b'0' && next <= b'9' { - return Err(self.error_at_pos(&format!("Expected word boundary after '{}'", String::from_utf8_lossy(keyword)))); - } - } - Ok(()) - } - fn has_string(&self, string: &[u8]) -> bool { - self.source.has(string) - } - fn consume_string(&mut self, string: &[u8]) -> Result<(), ParseError> { - let len = string.len(); - for i in 0..len { - let expected = Some(string[i]); - let next = self.source.next(); - if next != expected { - return Err(self.error_at_pos(&format!("Expected {}", String::from_utf8_lossy(string)))); - } - self.source.consume(); - } - Ok(()) - } - /// Generic comma-separated consumer. If opening delimiter is not found then - /// `Ok(None)` will be returned. Otherwise will consume the comma separated - /// values, allowing a trailing comma. If no comma is found and the closing - /// delimiter is not found, then a parse error with `expected_end_msg` is - /// returned. - fn consume_comma_separated( - &mut self, h: &mut Heap, open: u8, close: u8, expected_end_msg: &str, func: F - ) -> Result>, ParseError> - where F: Fn(&mut Lexer, &mut Heap) -> Result - { - if Some(open) != self.source.next() { - return Ok(None) - } - - self.source.consume(); - self.consume_whitespace(false)?; - let mut elements = Vec::new(); - let mut had_comma = true; - - loop { - if Some(close) == self.source.next() { - self.source.consume(); - break; - } else if !had_comma { - return Err(ParseError::new_error( - &self.source, self.source.pos(), expected_end_msg - )); - } - - elements.push(func(self, h)?); - self.consume_whitespace(false)?; - - had_comma = self.source.next() == Some(b','); - if had_comma { - self.source.consume(); - self.consume_whitespace(false)?; - } - } - - Ok(Some(elements)) - } - /// Essentially the same as `consume_comma_separated`, but will not allocate - /// memory. Will return `Ok(true)` and leave the input position at the end - /// the comma-separated list if well formed and `Ok(false)` if the list is - /// not present. Otherwise returns `Err(())` and leaves the input position - /// at a "random" position. - fn consume_comma_separated_spilled_without_pos_recovery bool>( - &mut self, open: u8, close: u8, func: F - ) -> Result { - if Some(open) != self.source.next() { - return Ok(false); - } - - self.source.consume(); - if self.consume_whitespace(false).is_err() { return Err(()) }; - let mut had_comma = true; - loop { - if Some(close) == self.source.next() { - self.source.consume(); - return Ok(true); - } else if !had_comma { - return Err(()); - } - - if !func(self) { return Err(()); } - if self.consume_whitespace(false).is_err() { return Err(()) }; - - had_comma = self.source.next() == Some(b','); - if had_comma { - self.source.consume(); - if self.consume_whitespace(false).is_err() { return Err(()); } - } - } - } - fn consume_ident(&mut self) -> Result, ParseError> { - if !self.has_identifier() { - return Err(self.error_at_pos("Expected identifier")); - } - let mut result = Vec::new(); - let mut next = self.source.next(); - result.push(next.unwrap()); - self.source.consume(); - next = self.source.next(); - while is_ident_rest(next) { - result.push(next.unwrap()); - self.source.consume(); - next = self.source.next(); - } - Ok(result) - } - fn has_integer(&mut self) -> bool { - is_integer_start(self.source.next()) - } - fn consume_integer(&mut self) -> Result { - let position = self.source.pos(); - let mut data = Vec::new(); - let mut next = self.source.next(); - while is_integer_rest(next) { - data.push(next.unwrap()); - self.source.consume(); - next = self.source.next(); - } - - let data_len = data.len(); - debug_assert_ne!(data_len, 0); - if data_len == 1 { - debug_assert!(data[0] >= b'0' && data[0] <= b'9'); - return Ok((data[0] - b'0') as i64); - } else { - // TODO: Fix, u64 should be supported as well - let parsed = if data[1] == b'b' { - let data = String::from_utf8_lossy(&data[2..]); - i64::from_str_radix(&data, 2) - } else if data[1] == b'o' { - let data = String::from_utf8_lossy(&data[2..]); - i64::from_str_radix(&data, 8) - } else if data[1] == b'x' { - let data = String::from_utf8_lossy(&data[2..]); - i64::from_str_radix(&data, 16) - } else { - // Assume decimal - let data = String::from_utf8_lossy(&data); - i64::from_str_radix(&data, 10) - }; - - if let Err(_err) = parsed { - return Err(ParseError::new_error(&self.source, position, "Invalid integer constant")); - } - - Ok(parsed.unwrap()) - } - } - - // Statement keywords - // TODO: Clean up these functions - fn has_statement_keyword(&self) -> bool { - self.has_keyword(b"channel") - || self.has_keyword(b"skip") - || self.has_keyword(b"if") - || self.has_keyword(b"while") - || self.has_keyword(b"break") - || self.has_keyword(b"continue") - || self.has_keyword(b"synchronous") - || self.has_keyword(b"return") - || self.has_keyword(b"assert") - || self.has_keyword(b"goto") - || self.has_keyword(b"new") - } - fn has_type_keyword(&self) -> bool { - self.has_keyword(b"in") - || self.has_keyword(b"out") - || self.has_keyword(b"msg") - || self.has_keyword(b"boolean") - || self.has_keyword(b"byte") - || self.has_keyword(b"short") - || self.has_keyword(b"int") - || self.has_keyword(b"long") - || self.has_keyword(b"auto") - } - fn has_builtin_keyword(&self) -> bool { - self.has_keyword(b"get") - || self.has_keyword(b"fires") - || self.has_keyword(b"create") - || self.has_keyword(b"length") - } - fn has_reserved(&self) -> bool { - self.has_statement_keyword() - || self.has_type_keyword() - || self.has_builtin_keyword() - || self.has_keyword(b"let") - || self.has_keyword(b"struct") - || self.has_keyword(b"enum") - || self.has_keyword(b"true") - || self.has_keyword(b"false") - || self.has_keyword(b"null") - } - - // Identifiers - - fn has_identifier(&self) -> bool { - if self.has_statement_keyword() || self.has_type_keyword() || self.has_builtin_keyword() { - return false; - } - let next = self.source.next(); - is_ident_start(next) - } - fn consume_identifier(&mut self) -> Result { - if self.has_statement_keyword() || self.has_type_keyword() || self.has_builtin_keyword() { - return Err(self.error_at_pos("Expected identifier")); - } - let position = self.source.pos(); - let value = self.consume_ident()?; - Ok(Identifier{ position, value }) - } - fn consume_identifier_spilled(&mut self) -> Result<(), ParseError> { - if self.has_statement_keyword() || self.has_type_keyword() || self.has_builtin_keyword() { - return Err(self.error_at_pos("Expected identifier")); - } - self.consume_ident()?; - Ok(()) - } - - fn consume_namespaced_identifier(&mut self, h: &mut Heap) -> Result { - if self.has_reserved() { - return Err(self.error_at_pos("Encountered reserved keyword")); - } - - // Consumes a part of the namespaced identifier, returns a boolean - // indicating whether polymorphic arguments were specified. - // TODO: Continue here: if we fail to properly parse the polymorphic - // arguments, assume we have reached the end of the namespaced - // identifier and are instead dealing with a less-than operator. Ugly? - // Yes. Needs tokenizer? Yes. - fn consume_part( - l: &mut Lexer, h: &mut Heap, ident: &mut NamespacedIdentifier, - backup_pos: &mut InputPosition - ) -> Result<(), ParseError> { - // Consume identifier - if !ident.value.is_empty() { - ident.value.extend(b"::"); - } - let ident_start = ident.value.len(); - ident.value.extend(l.consume_ident()?); - ident.parts.push(NamespacedIdentifierPart::Identifier{ - start: ident_start as u16, - end: ident.value.len() as u16 - }); - - // Maybe consume polymorphic args. - *backup_pos = l.source.pos(); - l.consume_whitespace(false)?; - match l.consume_polymorphic_args(h, true)? { - Some(args) => { - let poly_start = ident.poly_args.len(); - ident.poly_args.extend(args); - - ident.parts.push(NamespacedIdentifierPart::PolyArgs{ - start: poly_start as u16, - end: ident.poly_args.len() as u16, - }); - - *backup_pos = l.source.pos(); - }, - None => {} - }; - - Ok(()) - } - - let mut ident = NamespacedIdentifier{ - position: self.source.pos(), - value: Vec::new(), - poly_args: Vec::new(), - parts: Vec::new(), - }; - - // Keep consume parts separted by "::". We don't consume the trailing - // whitespace, hence we keep a backup position at the end of the last - // valid part of the namespaced identifier (i.e. the last ident, or the - // last encountered polymorphic arguments). - let mut backup_pos = self.source.pos(); - consume_part(self, h, &mut ident, &mut backup_pos)?; - self.consume_whitespace(false)?; - while self.has_string(b"::") { - self.consume_string(b"::")?; - self.consume_whitespace(false)?; - consume_part(self, h, &mut ident, &mut backup_pos)?; - self.consume_whitespace(false)?; - } - - self.source.seek(backup_pos); - Ok(ident) - } - - // Consumes a spilled namespaced identifier and returns the number of - // namespaces that we encountered. - fn consume_namespaced_identifier_spilled(&mut self) -> Result { - if self.has_reserved() { - return Err(self.error_at_pos("Encountered reserved keyword")); - } - - debug_log!("consume_nsident2_spilled: {}", debug_line!(self.source)); - - fn consume_part_spilled(l: &mut Lexer, backup_pos: &mut InputPosition) -> Result<(), ParseError> { - l.consume_ident()?; - *backup_pos = l.source.pos(); - l.consume_whitespace(false)?; - match l.maybe_consume_poly_args_spilled_without_pos_recovery() { - Ok(true) => { *backup_pos = l.source.pos(); }, - Ok(false) => {}, - Err(_) => { return Err(l.error_at_pos("Failed to parse poly args (spilled)")) }, - } - Ok(()) - } - - let mut backup_pos = self.source.pos(); - let mut num_namespaces = 1; - consume_part_spilled(self, &mut backup_pos)?; - self.consume_whitespace(false)?; - while self.has_string(b"::") { - self.consume_string(b"::")?; - self.consume_whitespace(false)?; - consume_part_spilled(self, &mut backup_pos)?; - self.consume_whitespace(false)?; - num_namespaces += 1; - } - - self.source.seek(backup_pos); - Ok(num_namespaces) - } - - // Types and type annotations - - /// Consumes a type definition. When called the input position should be at - /// the type specification. When done the input position will be at the end - /// of the type specifications (hence may be at whitespace). - fn consume_type(&mut self, h: &mut Heap, allow_inference: bool) -> Result { - // Small helper function to convert in/out polymorphic arguments. Not - // pretty, but return boolean is true if the error is due to inference - // not being allowed - let reduce_port_poly_args = | - heap: &mut Heap, - port_pos: &InputPosition, - args: Vec, - | -> Result { - match args.len() { - 0 => if allow_inference { - Ok(heap.alloc_parser_type(|this| ParserType{ - this, - pos: port_pos.clone(), - variant: ParserTypeVariant::Inferred - })) - } else { - Err(true) - }, - 1 => Ok(args[0]), - _ => Err(false) - } - }; - - // Consume the type - debug_log!("consume_type: {}", debug_line!(self.source)); - let pos = self.source.pos(); - let parser_type_variant = if self.has_keyword(b"msg") { - self.consume_keyword(b"msg")?; - ParserTypeVariant::Message - } else if self.has_keyword(b"boolean") { - self.consume_keyword(b"boolean")?; - ParserTypeVariant::Bool - } else if self.has_keyword(b"byte") { - self.consume_keyword(b"byte")?; - ParserTypeVariant::Byte - } else if self.has_keyword(b"short") { - self.consume_keyword(b"short")?; - ParserTypeVariant::Short - } else if self.has_keyword(b"int") { - self.consume_keyword(b"int")?; - ParserTypeVariant::Int - } else if self.has_keyword(b"long") { - self.consume_keyword(b"long")?; - ParserTypeVariant::Long - } else if self.has_keyword(b"str") { - self.consume_keyword(b"str")?; - ParserTypeVariant::String - } else if self.has_keyword(b"auto") { - if !allow_inference { - return Err(ParseError::new_error( - &self.source, pos, - "Type inference is not allowed here" - )); - } - - self.consume_keyword(b"auto")?; - ParserTypeVariant::Inferred - } else if self.has_keyword(b"in") { - // TODO: @cleanup: not particularly neat to have this special case - // where we enforce polyargs in the parser-phase - self.consume_keyword(b"in")?; - let poly_args = self.consume_polymorphic_args(h, allow_inference)?.unwrap_or_default(); - let poly_arg = reduce_port_poly_args(h, &pos, poly_args) - .map_err(|infer_error| { - let msg = if infer_error { - "Type inference is not allowed here" - } else { - "Type 'in' only allows for 1 polymorphic argument" - }; - ParseError::new_error(&self.source, pos, msg) - })?; - ParserTypeVariant::Input(poly_arg) - } else if self.has_keyword(b"out") { - self.consume_keyword(b"out")?; - let poly_args = self.consume_polymorphic_args(h, allow_inference)?.unwrap_or_default(); - let poly_arg = reduce_port_poly_args(h, &pos, poly_args) - .map_err(|infer_error| { - let msg = if infer_error { - "Type inference is not allowed here" - } else { - "Type 'out' only allows for 1 polymorphic argument, but {} were specified" - }; - ParseError::new_error(&self.source, pos, msg) - })?; - ParserTypeVariant::Output(poly_arg) - } else { - // Must be a symbolic type - let identifier = self.consume_namespaced_identifier(h)?; - ParserTypeVariant::Symbolic(SymbolicParserType{identifier, variant: None, poly_args2: Vec::new()}) - }; - - // If the type was a basic type (not supporting polymorphic type - // arguments), then we make sure the user did not specify any of them. - let mut backup_pos = self.source.pos(); - if !parser_type_variant.supports_polymorphic_args() { - self.consume_whitespace(false)?; - if let Some(b'<') = self.source.next() { - return Err(ParseError::new_error( - &self.source, self.source.pos(), - "This type does not allow polymorphic arguments" - )); - } - - self.source.seek(backup_pos); - } - - let mut parser_type_id = h.alloc_parser_type(|this| ParserType{ - this, pos, variant: parser_type_variant - }); - - // If we're dealing with arrays, then we need to wrap the currently - // parsed type in array types - self.consume_whitespace(false)?; - while let Some(b'[') = self.source.next() { - let pos = self.source.pos(); - self.source.consume(); - self.consume_whitespace(false)?; - if let Some(b']') = self.source.next() { - // Type is wrapped in an array - self.source.consume(); - parser_type_id = h.alloc_parser_type(|this| ParserType{ - this, pos, variant: ParserTypeVariant::Array(parser_type_id) - }); - backup_pos = self.source.pos(); - - // In case we're dealing with another array - self.consume_whitespace(false)?; - } else { - return Err(ParseError::new_error( - &self.source, pos, - "Expected a closing ']'" - )); - } - } - - self.source.seek(backup_pos); - Ok(parser_type_id) - } - - /// Attempts to consume a type without returning it. If it doesn't encounter - /// a well-formed type, then the input position is left at a "random" - /// position. - fn maybe_consume_type_spilled_without_pos_recovery(&mut self) -> bool { - // Consume type identifier - debug_log!("maybe_consume_type_spilled_...: {}", debug_line!(self.source)); - if self.has_type_keyword() { - self.consume_any_chars(); - } else { - let ident = self.consume_namespaced_identifier_spilled(); - if ident.is_err() { return false; } - } - - // Consume any polymorphic arguments that follow the type identifier - let mut backup_pos = self.source.pos(); - if self.consume_whitespace(false).is_err() { return false; } - - // Consume any array specifiers. Make sure we always leave the input - // position at the end of the last array specifier if we do find a - // valid type - if self.consume_whitespace(false).is_err() { return false; } - while let Some(b'[') = self.source.next() { - self.source.consume(); - if self.consume_whitespace(false).is_err() { return false; } - if self.source.next() != Some(b']') { return false; } - self.source.consume(); - backup_pos = self.source.pos(); - if self.consume_whitespace(false).is_err() { return false; } - } - - self.source.seek(backup_pos); - return true; - } - - fn maybe_consume_type_spilled(&mut self) -> bool { - let backup_pos = self.source.pos(); - if !self.maybe_consume_type_spilled_without_pos_recovery() { - self.source.seek(backup_pos); - return false; - } - - return true; - } - - /// Attempts to consume polymorphic arguments without returning them. If it - /// doesn't encounter well-formed polymorphic arguments, then the input - /// position is left at a "random" position. Returns a boolean indicating if - /// the poly_args list was present. - fn maybe_consume_poly_args_spilled_without_pos_recovery(&mut self) -> Result { - debug_log!("maybe_consume_poly_args_spilled_...: {}", debug_line!(self.source)); - self.consume_comma_separated_spilled_without_pos_recovery( - b'<', b'>', |lexer| { - lexer.maybe_consume_type_spilled_without_pos_recovery() - }) - } - - /// Consumes polymorphic arguments and its delimiters if specified. If - /// polyargs are present then the args are consumed and the input position - /// will be placed after the polyarg list. If polyargs are not present then - /// the input position will remain unmodified and an empty vector will be - /// returned. - /// - /// Polymorphic arguments represent the specification of the parametric - /// types of a polymorphic type: they specify the value of the polymorphic - /// type's polymorphic variables. - fn consume_polymorphic_args(&mut self, h: &mut Heap, allow_inference: bool) -> Result>, ParseError> { - self.consume_comma_separated( - h, b'<', b'>', "Expected the end of the polymorphic argument list", - |lexer, heap| lexer.consume_type(heap, allow_inference) - ) - } - - /// Consumes polymorphic variables. These are identifiers that are used - /// within polymorphic types. The input position may be at whitespace. If - /// polymorphic variables are present then the whitespace, wrapping - /// delimiters and the polymorphic variables are consumed. Otherwise the - /// input position will stay where it is. If no polymorphic variables are - /// present then an empty vector will be returned. - fn consume_polymorphic_vars(&mut self, h: &mut Heap) -> Result, ParseError> { - let backup_pos = self.source.pos(); - match self.consume_comma_separated( - h, b'<', b'>', "Expected the end of the polymorphic variable list", - |lexer, _heap| lexer.consume_identifier() - )? { - Some(poly_vars) => Ok(poly_vars), - None => { - self.source.seek(backup_pos); - Ok(vec!()) - } - } - } - - // Parameters - - fn consume_parameter(&mut self, h: &mut Heap) -> Result { - let parser_type = self.consume_type(h, false)?; - self.consume_whitespace(true)?; - let position = self.source.pos(); - let identifier = self.consume_identifier()?; - let id = - h.alloc_parameter(|this| Parameter { this, position, parser_type, identifier }); - Ok(id) - } - fn consume_parameters(&mut self, h: &mut Heap) -> Result, ParseError> { - match self.consume_comma_separated( - h, b'(', b')', "Expected the end of the parameter list", - |lexer, heap| lexer.consume_parameter(heap) - )? { - Some(params) => Ok(params), - None => { - Err(ParseError::new_error( - &self.source, self.source.pos(), - "Expected a parameter list" - )) - } - } - } - - // ==================== - // Expressions - // ==================== - - fn consume_paren_expression(&mut self, h: &mut Heap) -> Result { - self.consume_string(b"(")?; - self.consume_whitespace(false)?; - let result = self.consume_expression(h)?; - self.consume_whitespace(false)?; - self.consume_string(b")")?; - Ok(result) - } - fn consume_expression(&mut self, h: &mut Heap) -> Result { - if self.level >= MAX_LEVEL { - return Err(self.error_at_pos("Too deeply nested expression")); - } - self.level += 1; - let result = self.consume_assignment_expression(h); - self.level -= 1; - result - } - fn consume_assignment_expression(&mut self, h: &mut Heap) -> Result { - let result = self.consume_conditional_expression(h)?; - self.consume_whitespace(false)?; - if self.has_assignment_operator() { - let position = self.source.pos(); - let left = result; - let operation = self.consume_assignment_operator()?; - self.consume_whitespace(false)?; - let right = self.consume_expression(h)?; - Ok(h.alloc_assignment_expression(|this| AssignmentExpression { - this, - position, - left, - operation, - right, - parent: ExpressionParent::None, - concrete_type: ConcreteType::default(), - }) - .upcast()) - } else { - Ok(result) - } - } - fn has_assignment_operator(&self) -> bool { - self.has_string(b"=") - || self.has_string(b"*=") - || self.has_string(b"/=") - || self.has_string(b"%=") - || self.has_string(b"+=") - || self.has_string(b"-=") - || self.has_string(b"<<=") - || self.has_string(b">>=") - || self.has_string(b"&=") - || self.has_string(b"^=") - || self.has_string(b"|=") - } - fn consume_assignment_operator(&mut self) -> Result { - if self.has_string(b"=") { - self.consume_string(b"=")?; - Ok(AssignmentOperator::Set) - } else if self.has_string(b"*=") { - self.consume_string(b"*=")?; - Ok(AssignmentOperator::Multiplied) - } else if self.has_string(b"/=") { - self.consume_string(b"/=")?; - Ok(AssignmentOperator::Divided) - } else if self.has_string(b"%=") { - self.consume_string(b"%=")?; - Ok(AssignmentOperator::Remained) - } else if self.has_string(b"+=") { - self.consume_string(b"+=")?; - Ok(AssignmentOperator::Added) - } else if self.has_string(b"-=") { - self.consume_string(b"-=")?; - Ok(AssignmentOperator::Subtracted) - } else if self.has_string(b"<<=") { - self.consume_string(b"<<=")?; - Ok(AssignmentOperator::ShiftedLeft) - } else if self.has_string(b">>=") { - self.consume_string(b">>=")?; - Ok(AssignmentOperator::ShiftedRight) - } else if self.has_string(b"&=") { - self.consume_string(b"&=")?; - Ok(AssignmentOperator::BitwiseAnded) - } else if self.has_string(b"^=") { - self.consume_string(b"^=")?; - Ok(AssignmentOperator::BitwiseXored) - } else if self.has_string(b"|=") { - self.consume_string(b"|=")?; - Ok(AssignmentOperator::BitwiseOred) - } else { - Err(self.error_at_pos("Expected assignment operator")) - } - } - fn consume_conditional_expression(&mut self, h: &mut Heap) -> Result { - let result = self.consume_concat_expression(h)?; - self.consume_whitespace(false)?; - if self.has_string(b"?") { - let position = self.source.pos(); - let test = result; - self.consume_string(b"?")?; - self.consume_whitespace(false)?; - let true_expression = self.consume_expression(h)?; - self.consume_whitespace(false)?; - self.consume_string(b":")?; - self.consume_whitespace(false)?; - let false_expression = self.consume_expression(h)?; - Ok(h.alloc_conditional_expression(|this| ConditionalExpression { - this, - position, - test, - true_expression, - false_expression, - parent: ExpressionParent::None, - concrete_type: ConcreteType::default(), - }) - .upcast()) - } else { - Ok(result) - } - } - fn consume_concat_expression(&mut self, h: &mut Heap) -> Result { - let mut result = self.consume_lor_expression(h)?; - self.consume_whitespace(false)?; - while self.has_string(b"@") { - let position = self.source.pos(); - let left = result; - self.consume_string(b"@")?; - let operation = BinaryOperator::Concatenate; - self.consume_whitespace(false)?; - let right = self.consume_lor_expression(h)?; - self.consume_whitespace(false)?; - result = h - .alloc_binary_expression(|this| BinaryExpression { - this, - position, - left, - operation, - right, - parent: ExpressionParent::None, - concrete_type: ConcreteType::default(), - }) - .upcast(); - } - Ok(result) - } - fn consume_lor_expression(&mut self, h: &mut Heap) -> Result { - let mut result = self.consume_land_expression(h)?; - self.consume_whitespace(false)?; - while self.has_string(b"||") { - let position = self.source.pos(); - let left = result; - self.consume_string(b"||")?; - let operation = BinaryOperator::LogicalOr; - self.consume_whitespace(false)?; - let right = self.consume_land_expression(h)?; - self.consume_whitespace(false)?; - result = h - .alloc_binary_expression(|this| BinaryExpression { - this, - position, - left, - operation, - right, - parent: ExpressionParent::None, - concrete_type: ConcreteType::default(), - }) - .upcast(); - } - Ok(result) - } - fn consume_land_expression(&mut self, h: &mut Heap) -> Result { - let mut result = self.consume_bor_expression(h)?; - self.consume_whitespace(false)?; - while self.has_string(b"&&") { - let position = self.source.pos(); - let left = result; - self.consume_string(b"&&")?; - let operation = BinaryOperator::LogicalAnd; - self.consume_whitespace(false)?; - let right = self.consume_bor_expression(h)?; - self.consume_whitespace(false)?; - result = h - .alloc_binary_expression(|this| BinaryExpression { - this, - position, - left, - operation, - right, - parent: ExpressionParent::None, - concrete_type: ConcreteType::default(), - }) - .upcast(); - } - Ok(result) - } - fn consume_bor_expression(&mut self, h: &mut Heap) -> Result { - let mut result = self.consume_xor_expression(h)?; - self.consume_whitespace(false)?; - while self.has_string(b"|") && !self.has_string(b"||") && !self.has_string(b"|=") { - let position = self.source.pos(); - let left = result; - self.consume_string(b"|")?; - let operation = BinaryOperator::BitwiseOr; - self.consume_whitespace(false)?; - let right = self.consume_xor_expression(h)?; - self.consume_whitespace(false)?; - result = h - .alloc_binary_expression(|this| BinaryExpression { - this, - position, - left, - operation, - right, - parent: ExpressionParent::None, - concrete_type: ConcreteType::default(), - }) - .upcast(); - } - Ok(result) - } - fn consume_xor_expression(&mut self, h: &mut Heap) -> Result { - let mut result = self.consume_band_expression(h)?; - self.consume_whitespace(false)?; - while self.has_string(b"^") && !self.has_string(b"^=") { - let position = self.source.pos(); - let left = result; - self.consume_string(b"^")?; - let operation = BinaryOperator::BitwiseXor; - self.consume_whitespace(false)?; - let right = self.consume_band_expression(h)?; - self.consume_whitespace(false)?; - result = h - .alloc_binary_expression(|this| BinaryExpression { - this, - position, - left, - operation, - right, - parent: ExpressionParent::None, - concrete_type: ConcreteType::default(), - }) - .upcast(); - } - Ok(result) - } - fn consume_band_expression(&mut self, h: &mut Heap) -> Result { - let mut result = self.consume_eq_expression(h)?; - self.consume_whitespace(false)?; - while self.has_string(b"&") && !self.has_string(b"&&") && !self.has_string(b"&=") { - let position = self.source.pos(); - let left = result; - self.consume_string(b"&")?; - let operation = BinaryOperator::BitwiseAnd; - self.consume_whitespace(false)?; - let right = self.consume_eq_expression(h)?; - self.consume_whitespace(false)?; - result = h - .alloc_binary_expression(|this| BinaryExpression { - this, - position, - left, - operation, - right, - parent: ExpressionParent::None, - concrete_type: ConcreteType::default(), - }) - .upcast(); - } - Ok(result) - } - fn consume_eq_expression(&mut self, h: &mut Heap) -> Result { - let mut result = self.consume_rel_expression(h)?; - self.consume_whitespace(false)?; - while self.has_string(b"==") || self.has_string(b"!=") { - let position = self.source.pos(); - let left = result; - let operation; - if self.has_string(b"==") { - self.consume_string(b"==")?; - operation = BinaryOperator::Equality; - } else { - self.consume_string(b"!=")?; - operation = BinaryOperator::Inequality; - } - self.consume_whitespace(false)?; - let right = self.consume_rel_expression(h)?; - self.consume_whitespace(false)?; - result = h - .alloc_binary_expression(|this| BinaryExpression { - this, - position, - left, - operation, - right, - parent: ExpressionParent::None, - concrete_type: ConcreteType::default(), - }) - .upcast(); - } - Ok(result) - } - fn consume_rel_expression(&mut self, h: &mut Heap) -> Result { - let mut result = self.consume_shift_expression(h)?; - self.consume_whitespace(false)?; - while self.has_string(b"<=") - || self.has_string(b">=") - || self.has_string(b"<") && !self.has_string(b"<<=") - || self.has_string(b">") && !self.has_string(b">>=") - { - let position = self.source.pos(); - let left = result; - let operation; - if self.has_string(b"<=") { - self.consume_string(b"<=")?; - operation = BinaryOperator::LessThanEqual; - } else if self.has_string(b">=") { - self.consume_string(b">=")?; - operation = BinaryOperator::GreaterThanEqual; - } else if self.has_string(b"<") { - self.consume_string(b"<")?; - operation = BinaryOperator::LessThan; - } else { - self.consume_string(b">")?; - operation = BinaryOperator::GreaterThan; - } - self.consume_whitespace(false)?; - let right = self.consume_shift_expression(h)?; - self.consume_whitespace(false)?; - result = h - .alloc_binary_expression(|this| BinaryExpression { - this, - position, - left, - operation, - right, - parent: ExpressionParent::None, - concrete_type: ConcreteType::default(), - }) - .upcast(); - } - Ok(result) - } - fn consume_shift_expression(&mut self, h: &mut Heap) -> Result { - let mut result = self.consume_add_expression(h)?; - self.consume_whitespace(false)?; - while self.has_string(b"<<") && !self.has_string(b"<<=") - || self.has_string(b">>") && !self.has_string(b">>=") - { - let position = self.source.pos(); - let left = result; - let operation; - if self.has_string(b"<<") { - self.consume_string(b"<<")?; - operation = BinaryOperator::ShiftLeft; - } else { - self.consume_string(b">>")?; - operation = BinaryOperator::ShiftRight; - } - self.consume_whitespace(false)?; - let right = self.consume_add_expression(h)?; - self.consume_whitespace(false)?; - result = h - .alloc_binary_expression(|this| BinaryExpression { - this, - position, - left, - operation, - right, - parent: ExpressionParent::None, - concrete_type: ConcreteType::default(), - }) - .upcast(); - } - Ok(result) - } - fn consume_add_expression(&mut self, h: &mut Heap) -> Result { - let mut result = self.consume_mul_expression(h)?; - self.consume_whitespace(false)?; - while self.has_string(b"+") && !self.has_string(b"+=") - || self.has_string(b"-") && !self.has_string(b"-=") - { - let position = self.source.pos(); - let left = result; - let operation; - if self.has_string(b"+") { - self.consume_string(b"+")?; - operation = BinaryOperator::Add; - } else { - self.consume_string(b"-")?; - operation = BinaryOperator::Subtract; - } - self.consume_whitespace(false)?; - let right = self.consume_mul_expression(h)?; - self.consume_whitespace(false)?; - result = h - .alloc_binary_expression(|this| BinaryExpression { - this, - position, - left, - operation, - right, - parent: ExpressionParent::None, - concrete_type: ConcreteType::default(), - }) - .upcast(); - } - Ok(result) - } - fn consume_mul_expression(&mut self, h: &mut Heap) -> Result { - let mut result = self.consume_prefix_expression(h)?; - self.consume_whitespace(false)?; - while self.has_string(b"*") && !self.has_string(b"*=") - || self.has_string(b"/") && !self.has_string(b"/=") - || self.has_string(b"%") && !self.has_string(b"%=") - { - let position = self.source.pos(); - let left = result; - let operation; - if self.has_string(b"*") { - self.consume_string(b"*")?; - operation = BinaryOperator::Multiply; - } else if self.has_string(b"/") { - self.consume_string(b"/")?; - operation = BinaryOperator::Divide; - } else { - self.consume_string(b"%")?; - operation = BinaryOperator::Remainder; - } - self.consume_whitespace(false)?; - let right = self.consume_prefix_expression(h)?; - self.consume_whitespace(false)?; - result = h - .alloc_binary_expression(|this| BinaryExpression { - this, - position, - left, - operation, - right, - parent: ExpressionParent::None, - concrete_type: ConcreteType::default(), - }) - .upcast(); - } - Ok(result) - } - fn consume_prefix_expression(&mut self, h: &mut Heap) -> Result { - if self.has_string(b"+") - || self.has_string(b"-") - || self.has_string(b"~") - || self.has_string(b"!") - { - let position = self.source.pos(); - let operation; - if self.has_string(b"+") { - self.consume_string(b"+")?; - if self.has_string(b"+") { - self.consume_string(b"+")?; - operation = UnaryOperation::PreIncrement; - } else { - operation = UnaryOperation::Positive; - } - } else if self.has_string(b"-") { - self.consume_string(b"-")?; - if self.has_string(b"-") { - self.consume_string(b"-")?; - operation = UnaryOperation::PreDecrement; - } else { - operation = UnaryOperation::Negative; - } - } else if self.has_string(b"~") { - self.consume_string(b"~")?; - operation = UnaryOperation::BitwiseNot; - } else { - self.consume_string(b"!")?; - operation = UnaryOperation::LogicalNot; - } - self.consume_whitespace(false)?; - if self.level >= MAX_LEVEL { - return Err(self.error_at_pos("Too deeply nested expression")); - } - self.level += 1; - let result = self.consume_prefix_expression(h); - self.level -= 1; - let expression = result?; - return Ok(h - .alloc_unary_expression(|this| UnaryExpression { - this, - position, - operation, - expression, - parent: ExpressionParent::None, - concrete_type: ConcreteType::default(), - }) - .upcast()); - } - self.consume_postfix_expression(h) - } - fn consume_postfix_expression(&mut self, h: &mut Heap) -> Result { - let mut result = self.consume_primary_expression(h)?; - self.consume_whitespace(false)?; - while self.has_string(b"++") - || self.has_string(b"--") - || self.has_string(b"[") - || (self.has_string(b".") && !self.has_string(b"..")) - { - let mut position = self.source.pos(); - if self.has_string(b"++") { - self.consume_string(b"++")?; - let operation = UnaryOperation::PostIncrement; - let expression = result; - self.consume_whitespace(false)?; - result = h - .alloc_unary_expression(|this| UnaryExpression { - this, - position, - operation, - expression, - parent: ExpressionParent::None, - concrete_type: ConcreteType::default(), - }) - .upcast(); - } else if self.has_string(b"--") { - self.consume_string(b"--")?; - let operation = UnaryOperation::PostDecrement; - let expression = result; - self.consume_whitespace(false)?; - result = h - .alloc_unary_expression(|this| UnaryExpression { - this, - position, - operation, - expression, - parent: ExpressionParent::None, - concrete_type: ConcreteType::default(), - }) - .upcast(); - } else if self.has_string(b"[") { - self.consume_string(b"[")?; - self.consume_whitespace(false)?; - let subject = result; - let index = self.consume_expression(h)?; - self.consume_whitespace(false)?; - if self.has_string(b"..") || self.has_string(b":") { - position = self.source.pos(); - if self.has_string(b"..") { - self.consume_string(b"..")?; - } else { - self.consume_string(b":")?; - } - self.consume_whitespace(false)?; - let to_index = self.consume_expression(h)?; - self.consume_whitespace(false)?; - result = h - .alloc_slicing_expression(|this| SlicingExpression { - this, - position, - subject, - from_index: index, - to_index, - parent: ExpressionParent::None, - concrete_type: ConcreteType::default(), - }) - .upcast(); - } else { - result = h - .alloc_indexing_expression(|this| IndexingExpression { - this, - position, - subject, - index, - parent: ExpressionParent::None, - concrete_type: ConcreteType::default(), - }) - .upcast(); - } - self.consume_string(b"]")?; - self.consume_whitespace(false)?; - } else { - assert!(self.has_string(b".")); - self.consume_string(b".")?; - self.consume_whitespace(false)?; - let subject = result; - let field; - if self.has_keyword(b"length") { - self.consume_keyword(b"length")?; - field = Field::Length; - } else { - let identifier = self.consume_identifier()?; - field = Field::Symbolic(FieldSymbolic{ - identifier, - definition: None, - field_idx: 0, - }); - } - result = h - .alloc_select_expression(|this| SelectExpression { - this, - position, - subject, - field, - parent: ExpressionParent::None, - concrete_type: ConcreteType::default(), - }) - .upcast(); - } - } - Ok(result) - } - fn consume_primary_expression(&mut self, h: &mut Heap) -> Result { - if self.has_string(b"(") { - return self.consume_paren_expression(h); - } - if self.has_string(b"{") { - return Ok(self.consume_array_expression(h)?.upcast()); - } - if self.has_builtin_literal() { - return Ok(self.consume_builtin_literal_expression(h)?.upcast()); - } - if self.has_struct_literal() { - return Ok(self.consume_struct_literal_expression(h)?.upcast()); - } - if self.has_call_expression() { - return Ok(self.consume_call_expression(h)?.upcast()); - } - if self.has_enum_literal() { - return Ok(self.consume_enum_literal(h)?.upcast()); - } - Ok(self.consume_variable_expression(h)?.upcast()) - } - fn consume_array_expression(&mut self, h: &mut Heap) -> Result { - let position = self.source.pos(); - let mut elements = Vec::new(); - self.consume_string(b"{")?; - self.consume_whitespace(false)?; - if !self.has_string(b"}") { - while self.source.next().is_some() { - elements.push(self.consume_expression(h)?); - self.consume_whitespace(false)?; - if self.has_string(b"}") { - break; - } - self.consume_string(b",")?; - self.consume_whitespace(false)?; - } - } - self.consume_string(b"}")?; - Ok(h.alloc_array_expression(|this| ArrayExpression { - this, - position, - elements, - parent: ExpressionParent::None, - concrete_type: ConcreteType::default(), - })) - } - fn has_builtin_literal(&self) -> bool { - is_constant(self.source.next()) - || self.has_keyword(b"null") - || self.has_keyword(b"true") - || self.has_keyword(b"false") - } - fn consume_builtin_literal_expression( - &mut self, - h: &mut Heap, - ) -> Result { - let position = self.source.pos(); - let value; - if self.has_keyword(b"null") { - self.consume_keyword(b"null")?; - value = Literal::Null; - } else if self.has_keyword(b"true") { - self.consume_keyword(b"true")?; - value = Literal::True; - } else if self.has_keyword(b"false") { - self.consume_keyword(b"false")?; - value = Literal::False; - } else if self.source.next() == Some(b'\'') { - self.source.consume(); - let mut data = Vec::new(); - let mut next = self.source.next(); - while next != Some(b'\'') && (is_vchar(next) || next == Some(b' ')) { - data.push(next.unwrap()); - self.source.consume(); - next = self.source.next(); - } - if next != Some(b'\'') || data.is_empty() { - return Err(self.error_at_pos("Expected character constant")); - } - self.source.consume(); - value = Literal::Character(data); - } else { - if !self.has_integer() { - return Err(self.error_at_pos("Expected integer constant")); - } - - value = Literal::Integer(self.consume_integer()?); - } - Ok(h.alloc_literal_expression(|this| LiteralExpression { - this, - position, - value, - parent: ExpressionParent::None, - concrete_type: ConcreteType::default(), - })) - } - fn has_enum_literal(&mut self) -> bool { - // An enum literal is always: - // maybe_a_namespace::EnumName::Variant - // So may for now be distinguished from other literals/variables by - // first checking for struct literals and call expressions, then for - // enum literals, finally for variable expressions. It is different - // from a variable expression in that it _always_ contains multiple - // elements to the enum. - let backup_pos = self.source.pos(); - let result = match self.consume_namespaced_identifier_spilled() { - Ok(num_namespaces) => num_namespaces > 1, - Err(_) => false, - }; - self.source.seek(backup_pos); - result - } - fn consume_enum_literal(&mut self, h: &mut Heap) -> Result { - let identifier = self.consume_namespaced_identifier(h)?; - Ok(h.alloc_literal_expression(|this| LiteralExpression{ - this, - position: identifier.position, - value: Literal::Enum(LiteralEnum{ - identifier, - poly_args2: Vec::new(), - definition: None, - variant_idx: 0, - }), - parent: ExpressionParent::None, - concrete_type: ConcreteType::default(), - })) - } - fn has_struct_literal(&mut self) -> bool { - // A struct literal is written as: - // namespace::StructName{ field: expr } - // We will parse up until the opening brace to see if we're dealing with - // a struct literal. - let backup_pos = self.source.pos(); - let result = self.consume_namespaced_identifier_spilled().is_ok() && - self.consume_whitespace(false).is_ok() && - self.source.next() == Some(b'{'); - - self.source.seek(backup_pos); - return result; - } - - fn consume_struct_literal_expression(&mut self, h: &mut Heap) -> Result { - // Consume identifier and polymorphic arguments - debug_log!("consume_struct_literal_expression: {}", debug_line!(self.source)); - let position = self.source.pos(); - let identifier = self.consume_namespaced_identifier(h)?; - self.consume_whitespace(false)?; - - // Consume fields - let fields = match self.consume_comma_separated( - h, b'{', b'}', "Expected the end of the list of struct fields", - |lexer, heap| { - let identifier = lexer.consume_identifier()?; - lexer.consume_whitespace(false)?; - lexer.consume_string(b":")?; - lexer.consume_whitespace(false)?; - let value = lexer.consume_expression(heap)?; - - Ok(LiteralStructField{ identifier, value, field_idx: 0 }) - } - )? { - Some(fields) => fields, - None => return Err(ParseError::new_error( - self.source, self.source.pos(), - "A struct literal must be followed by its field values" - )) - }; - - Ok(h.alloc_literal_expression(|this| LiteralExpression{ - this, - position, - value: Literal::Struct(LiteralStruct{ - identifier, - fields, - poly_args2: Vec::new(), - definition: None, - }), - parent: ExpressionParent::None, - concrete_type: Default::default() - })) - } - - fn has_call_expression(&mut self) -> bool { - // We need to prevent ambiguity with various operators (because we may - // be specifying polymorphic variables) and variables. - if self.has_builtin_keyword() { - return true; - } - - let backup_pos = self.source.pos(); - let mut result = false; - - if self.consume_namespaced_identifier_spilled().is_ok() && - self.consume_whitespace(false).is_ok() && - self.source.next() == Some(b'(') { - // Seems like we have a function call or an enum literal - result = true; - } - - self.source.seek(backup_pos); - return result; - } - fn consume_call_expression(&mut self, h: &mut Heap) -> Result { - let position = self.source.pos(); - - // Consume method identifier - // TODO: @token Replace this conditional polymorphic arg parsing once we have a tokenizer. - debug_log!("consume_call_expression: {}", debug_line!(self.source)); - let method; - let mut consume_poly_args_explicitly = true; - if self.has_keyword(b"get") { - self.consume_keyword(b"get")?; - method = Method::Get; - } else if self.has_keyword(b"put") { - self.consume_keyword(b"put")?; - method = Method::Put; - } else if self.has_keyword(b"fires") { - self.consume_keyword(b"fires")?; - method = Method::Fires; - } else if self.has_keyword(b"create") { - self.consume_keyword(b"create")?; - method = Method::Create; - } else { - let identifier = self.consume_namespaced_identifier(h)?; - method = Method::Symbolic(MethodSymbolic{ - identifier, - definition: None - }); - consume_poly_args_explicitly = false; - }; - - // Consume polymorphic arguments - let poly_args = if consume_poly_args_explicitly { - self.consume_whitespace(false)?; - self.consume_polymorphic_args(h, true)?.unwrap_or_default() - } else { - Vec::new() - }; - - // Consume arguments to call - self.consume_whitespace(false)?; - let mut arguments = Vec::new(); - self.consume_string(b"(")?; - self.consume_whitespace(false)?; - if !self.has_string(b")") { - // TODO: allow trailing comma - while self.source.next().is_some() { - arguments.push(self.consume_expression(h)?); - self.consume_whitespace(false)?; - if self.has_string(b")") { - break; - } - self.consume_string(b",")?; - self.consume_whitespace(false)? - } - } - self.consume_string(b")")?; - Ok(h.alloc_call_expression(|this| CallExpression { - this, - position, - method, - arguments, - poly_args, - parent: ExpressionParent::None, - concrete_type: ConcreteType::default(), - })) - } - fn consume_variable_expression( - &mut self, - h: &mut Heap, - ) -> Result { - let position = self.source.pos(); - debug_log!("consume_variable_expression: {}", debug_line!(self.source)); - - // TODO: @token Reimplement when tokenizer is implemented, prevent ambiguities - let identifier = identifier_as_namespaced(self.consume_identifier()?); - - Ok(h.alloc_variable_expression(|this| VariableExpression { - this, - position, - identifier, - declaration: None, - parent: ExpressionParent::None, - concrete_type: ConcreteType::default(), - })) - } - - // ==================== - // Statements - // ==================== - - /// Consumes any kind of statement from the source and will error if it - /// did not encounter a statement. Will also return an error if the - /// statement is nested too deeply. - /// - /// `wrap_in_block` may be set to true to ensure that the parsed statement - /// will be wrapped in a block statement if it is not already a block - /// statement. This is used to ensure that all `if`, `while` and `sync` - /// statements have a block statement as body. - fn consume_statement(&mut self, h: &mut Heap, wrap_in_block: bool) -> Result { - if self.level >= MAX_LEVEL { - return Err(self.error_at_pos("Too deeply nested statement")); - } - self.level += 1; - let result = self.consume_statement_impl(h, wrap_in_block); - self.level -= 1; - result - } - fn has_label(&mut self) -> bool { - // To prevent ambiguity with expression statements consisting only of an - // identifier or a namespaced identifier, we look ahead and match on the - // *single* colon that signals a labeled statement. - let backup_pos = self.source.pos(); - let mut result = false; - if self.consume_identifier_spilled().is_ok() { - // next character is ':', second character is NOT ':' - result = Some(b':') == self.source.next() && Some(b':') != self.source.lookahead(1) - } - self.source.seek(backup_pos); - return result; - } - fn consume_statement_impl(&mut self, h: &mut Heap, wrap_in_block: bool) -> Result { - // Parse and allocate statement - let mut must_wrap = true; - let mut stmt_id = if self.has_string(b"{") { - must_wrap = false; - self.consume_block_statement(h)? - } else if self.has_keyword(b"skip") { - must_wrap = false; - self.consume_skip_statement(h)?.upcast() - } else if self.has_keyword(b"if") { - self.consume_if_statement(h)?.upcast() - } else if self.has_keyword(b"while") { - self.consume_while_statement(h)?.upcast() - } else if self.has_keyword(b"break") { - self.consume_break_statement(h)?.upcast() - } else if self.has_keyword(b"continue") { - self.consume_continue_statement(h)?.upcast() - } else if self.has_keyword(b"synchronous") { - self.consume_synchronous_statement(h)?.upcast() - } else if self.has_keyword(b"return") { - self.consume_return_statement(h)?.upcast() - } else if self.has_keyword(b"assert") { - self.consume_assert_statement(h)?.upcast() - } else if self.has_keyword(b"goto") { - self.consume_goto_statement(h)?.upcast() - } else if self.has_keyword(b"new") { - self.consume_new_statement(h)?.upcast() - } else if self.has_label() { - self.consume_labeled_statement(h)?.upcast() - } else { - self.consume_expression_statement(h)?.upcast() - }; - - // Wrap if desired and if needed - if must_wrap && wrap_in_block { - let position = h[stmt_id].position(); - let block_wrapper = h.alloc_block_statement(|this| BlockStatement{ - this, - position, - statements: vec![stmt_id], - parent_scope: None, - relative_pos_in_parent: 0, - locals: Vec::new(), - labels: Vec::new() - }); - - stmt_id = block_wrapper.upcast(); - } - - Ok(stmt_id) - } - fn has_local_statement(&mut self) -> bool { - /* To avoid ambiguity, we look ahead to find either the - channel keyword that signals a variable declaration, or - a type annotation followed by another identifier. - Example: - my_type[] x = {5}; // memory statement - my_var[5] = x; // assignment expression, expression statement - Note how both the local and the assignment - start with arbitrary identifier followed by [. */ - if self.has_keyword(b"channel") { - return true; - } - if self.has_statement_keyword() { - return false; - } - let backup_pos = self.source.pos(); - let mut result = false; - if self.maybe_consume_type_spilled_without_pos_recovery() { - // We seem to have a valid type, do we now have an identifier? - if self.consume_whitespace(true).is_ok() { - result = self.has_identifier(); - } - } - - self.source.seek(backup_pos); - return result; - } - fn consume_block_statement(&mut self, h: &mut Heap) -> Result { - let position = self.source.pos(); - let mut statements = Vec::new(); - self.consume_string(b"{")?; - self.consume_whitespace(false)?; - while self.has_local_statement() { - let (local_id, stmt_id) = self.consume_local_statement(h)?; - statements.push(local_id.upcast()); - if let Some(stmt_id) = stmt_id { - statements.push(stmt_id.upcast()); - } - self.consume_whitespace(false)?; - } - while !self.has_string(b"}") { - statements.push(self.consume_statement(h, false)?); - self.consume_whitespace(false)?; - } - self.consume_string(b"}")?; - if statements.is_empty() { - Ok(h.alloc_skip_statement(|this| SkipStatement { this, position, next: None }).upcast()) - } else { - Ok(h.alloc_block_statement(|this| BlockStatement { - this, - position, - statements, - parent_scope: None, - relative_pos_in_parent: 0, - locals: Vec::new(), - labels: Vec::new(), - }) - .upcast()) - } - } - fn consume_local_statement(&mut self, h: &mut Heap) -> Result<(LocalStatementId, Option), ParseError> { - if self.has_keyword(b"channel") { - let local_id = self.consume_channel_statement(h)?.upcast(); - Ok((local_id, None)) - } else { - let (memory_id, stmt_id) = self.consume_memory_statement(h)?; - Ok((memory_id.upcast(), Some(stmt_id))) - } - } - fn consume_channel_statement( - &mut self, - h: &mut Heap, - ) -> Result { - // Consume channel statement and polymorphic argument if specified. - // Needs a tiny bit of special parsing to ensure the right amount of - // whitespace is present. - let position = self.source.pos(); - self.consume_keyword(b"channel")?; - - let expect_whitespace = self.source.next() != Some(b'<'); - self.consume_whitespace(expect_whitespace)?; - let poly_args = self.consume_polymorphic_args(h, true)?.unwrap_or_default(); - let poly_arg_id = match poly_args.len() { - 0 => h.alloc_parser_type(|this| ParserType{ - this, pos: position.clone(), variant: ParserTypeVariant::Inferred, - }), - 1 => poly_args[0], - _ => return Err(ParseError::new_error( - &self.source, self.source.pos(), - "port construction using 'channel' accepts up to 1 polymorphic argument" - )) - }; - self.consume_whitespace(false)?; - - // Consume the output port - let out_parser_type = h.alloc_parser_type(|this| ParserType{ - this, pos: position.clone(), variant: ParserTypeVariant::Output(poly_arg_id) - }); - let out_identifier = self.consume_identifier()?; - - // Consume the "->" syntax - self.consume_whitespace(false)?; - self.consume_string(b"->")?; - self.consume_whitespace(false)?; - - // Consume the input port - let in_parser_type = h.alloc_parser_type(|this| ParserType{ - this, pos: position.clone(), variant: ParserTypeVariant::Input(poly_arg_id) - }); - let in_identifier = self.consume_identifier()?; - self.consume_whitespace(false)?; - self.consume_string(b";")?; - let out_port = h.alloc_local(|this| Local { - this, - position, - parser_type: out_parser_type, - identifier: out_identifier, - relative_pos_in_block: 0 - }); - let in_port = h.alloc_local(|this| Local { - this, - position, - parser_type: in_parser_type, - identifier: in_identifier, - relative_pos_in_block: 0 - }); - Ok(h.alloc_channel_statement(|this| ChannelStatement { - this, - position, - from: out_port, - to: in_port, - relative_pos_in_block: 0, - next: None, - })) - } - fn consume_memory_statement(&mut self, h: &mut Heap) -> Result<(MemoryStatementId, ExpressionStatementId), ParseError> { - let position = self.source.pos(); - let parser_type = self.consume_type(h, true)?; - self.consume_whitespace(true)?; - let identifier = self.consume_identifier()?; - self.consume_whitespace(false)?; - let assignment_position = self.source.pos(); - self.consume_string(b"=")?; - self.consume_whitespace(false)?; - let initial = self.consume_expression(h)?; - let variable = h.alloc_local(|this| Local { - this, - position, - parser_type, - identifier: identifier.clone(), - relative_pos_in_block: 0 - }); - self.consume_whitespace(false)?; - self.consume_string(b";")?; - - // Transform into the variable declaration, followed by an assignment - let memory_stmt_id = h.alloc_memory_statement(|this| MemoryStatement { - this, - position, - variable, - next: None, - }); - let variable_expr_id = h.alloc_variable_expression(|this| VariableExpression{ - this, - position: identifier.position.clone(), - identifier: identifier_as_namespaced(identifier), - declaration: None, - parent: ExpressionParent::None, - concrete_type: Default::default() - }); - let assignment_expr_id = h.alloc_assignment_expression(|this| AssignmentExpression{ - this, - position: assignment_position, - left: variable_expr_id.upcast(), - operation: AssignmentOperator::Set, - right: initial, - parent: ExpressionParent::None, - concrete_type: Default::default() - }); - let assignment_stmt_id = h.alloc_expression_statement(|this| ExpressionStatement{ - this, - position, - expression: assignment_expr_id.upcast(), - next: None - }); - Ok((memory_stmt_id, assignment_stmt_id)) - } - fn consume_labeled_statement( - &mut self, - h: &mut Heap, - ) -> Result { - let position = self.source.pos(); - let label = self.consume_identifier()?; - self.consume_whitespace(false)?; - self.consume_string(b":")?; - self.consume_whitespace(false)?; - let body = self.consume_statement(h, false)?; - Ok(h.alloc_labeled_statement(|this| LabeledStatement { - this, - position, - label, - body, - relative_pos_in_block: 0, - in_sync: None, - })) - } - fn consume_skip_statement(&mut self, h: &mut Heap) -> Result { - let position = self.source.pos(); - self.consume_keyword(b"skip")?; - self.consume_whitespace(false)?; - self.consume_string(b";")?; - Ok(h.alloc_skip_statement(|this| SkipStatement { this, position, next: None })) - } - fn consume_if_statement(&mut self, h: &mut Heap) -> Result { - let position = self.source.pos(); - self.consume_keyword(b"if")?; - self.consume_whitespace(false)?; - let test = self.consume_paren_expression(h)?; - self.consume_whitespace(false)?; - let true_body = self.consume_statement(h, true)?; - self.consume_whitespace(false)?; - let false_body = if self.has_keyword(b"else") { - self.consume_keyword(b"else")?; - self.consume_whitespace(false)?; - self.consume_statement(h, true)? - } else { - h.alloc_skip_statement(|this| SkipStatement { this, position, next: None }).upcast() - }; - Ok(h.alloc_if_statement(|this| IfStatement { this, position, test, true_body, false_body, end_if: None })) - } - fn consume_while_statement(&mut self, h: &mut Heap) -> Result { - let position = self.source.pos(); - self.consume_keyword(b"while")?; - self.consume_whitespace(false)?; - let test = self.consume_paren_expression(h)?; - self.consume_whitespace(false)?; - let body = self.consume_statement(h, true)?; - Ok(h.alloc_while_statement(|this| WhileStatement { - this, - position, - test, - body, - end_while: None, - in_sync: None, - })) - } - fn consume_break_statement(&mut self, h: &mut Heap) -> Result { - let position = self.source.pos(); - self.consume_keyword(b"break")?; - self.consume_whitespace(false)?; - let label; - if self.has_identifier() { - label = Some(self.consume_identifier()?); - self.consume_whitespace(false)?; - } else { - label = None; - } - self.consume_string(b";")?; - Ok(h.alloc_break_statement(|this| BreakStatement { this, position, label, target: None })) - } - fn consume_continue_statement( - &mut self, - h: &mut Heap, - ) -> Result { - let position = self.source.pos(); - self.consume_keyword(b"continue")?; - self.consume_whitespace(false)?; - let label; - if self.has_identifier() { - label = Some(self.consume_identifier()?); - self.consume_whitespace(false)?; - } else { - label = None; - } - self.consume_string(b";")?; - Ok(h.alloc_continue_statement(|this| ContinueStatement { - this, - position, - label, - target: None, - })) - } - fn consume_synchronous_statement( - &mut self, - h: &mut Heap, - ) -> Result { - let position = self.source.pos(); - self.consume_keyword(b"synchronous")?; - self.consume_whitespace(false)?; - // TODO: What was the purpose of this? Seems superfluous and confusing? - // let mut parameters = Vec::new(); - // if self.has_string(b"(") { - // self.consume_parameters(h, &mut parameters)?; - // self.consume_whitespace(false)?; - // } else if !self.has_keyword(b"skip") && !self.has_string(b"{") { - // return Err(self.error_at_pos("Expected block statement")); - // } - let body = self.consume_statement(h, true)?; - Ok(h.alloc_synchronous_statement(|this| SynchronousStatement { - this, - position, - body, - end_sync: None, - parent_scope: None, - })) - } - fn consume_return_statement(&mut self, h: &mut Heap) -> Result { - let position = self.source.pos(); - self.consume_keyword(b"return")?; - self.consume_whitespace(false)?; - let expression = if self.has_string(b"(") { - self.consume_paren_expression(h) - } else { - self.consume_expression(h) - }?; - self.consume_whitespace(false)?; - self.consume_string(b";")?; - Ok(h.alloc_return_statement(|this| ReturnStatement { this, position, expression })) - } - fn consume_assert_statement(&mut self, h: &mut Heap) -> Result { - let position = self.source.pos(); - self.consume_keyword(b"assert")?; - self.consume_whitespace(false)?; - let expression = if self.has_string(b"(") { - self.consume_paren_expression(h) - } else { - self.consume_expression(h) - }?; - self.consume_whitespace(false)?; - self.consume_string(b";")?; - Ok(h.alloc_assert_statement(|this| AssertStatement { - this, - position, - expression, - next: None, - })) - } - fn consume_goto_statement(&mut self, h: &mut Heap) -> Result { - let position = self.source.pos(); - self.consume_keyword(b"goto")?; - self.consume_whitespace(false)?; - let label = self.consume_identifier()?; - self.consume_whitespace(false)?; - self.consume_string(b";")?; - Ok(h.alloc_goto_statement(|this| GotoStatement { this, position, label, target: None })) - } - fn consume_new_statement(&mut self, h: &mut Heap) -> Result { - let position = self.source.pos(); - self.consume_keyword(b"new")?; - self.consume_whitespace(false)?; - let expression = self.consume_call_expression(h)?; - self.consume_whitespace(false)?; - self.consume_string(b";")?; - Ok(h.alloc_new_statement(|this| NewStatement { this, position, expression, next: None })) - } - fn consume_expression_statement( - &mut self, - h: &mut Heap, - ) -> Result { - let position = self.source.pos(); - let expression = self.consume_expression(h)?; - self.consume_whitespace(false)?; - self.consume_string(b";")?; - Ok(h.alloc_expression_statement(|this| ExpressionStatement { - this, - position, - expression, - next: None, - })) - } - - // ==================== - // Symbol definitions - // ==================== - - fn has_symbol_definition(&self) -> bool { - self.has_keyword(b"composite") - || self.has_keyword(b"primitive") - || self.has_type_keyword() - || self.has_identifier() - } - fn consume_symbol_definition(&mut self, h: &mut Heap) -> Result { - if self.has_keyword(b"struct") { - Ok(self.consume_struct_definition(h)?.upcast()) - } else if self.has_keyword(b"enum") { - Ok(self.consume_enum_definition(h)?.upcast()) - } else if self.has_keyword(b"union") { - Ok(self.consume_union_definition(h)?.upcast()) - } else if self.has_keyword(b"composite") || self.has_keyword(b"primitive") { - Ok(self.consume_component_definition(h)?.upcast()) - } else { - Ok(self.consume_function_definition(h)?.upcast()) - } - } - fn consume_struct_definition(&mut self, h: &mut Heap) -> Result { - // Parse "struct" keyword, optional polyvars and its identifier - let struct_pos = self.source.pos(); - self.consume_keyword(b"struct")?; - self.consume_whitespace(true)?; - let struct_ident = self.consume_identifier()?; - self.consume_whitespace(false)?; - let poly_vars = self.consume_polymorphic_vars(h)?; - self.consume_whitespace(false)?; - - // Parse struct fields - let fields = match self.consume_comma_separated( - h, b'{', b'}', "Expected the end of the list of struct fields", - |lexer, heap| { - let position = lexer.source.pos(); - let parser_type = lexer.consume_type(heap, false)?; - lexer.consume_whitespace(true)?; - let field = lexer.consume_identifier()?; - - Ok(StructFieldDefinition{ position, field, parser_type }) - } - )? { - Some(fields) => fields, - None => return Err(ParseError::new_error( - self.source, struct_pos, - "An struct definition must be followed by its fields" - )), - }; - - // Valid struct definition - Ok(h.alloc_struct_definition(|this| StructDefinition{ - this, - position: struct_pos, - identifier: struct_ident, - poly_vars, - fields, - })) - } - fn consume_enum_definition(&mut self, h: &mut Heap) -> Result { - // Parse "enum" keyword, optional polyvars and its identifier - let enum_pos = self.source.pos(); - self.consume_keyword(b"enum")?; - self.consume_whitespace(true)?; - let enum_ident = self.consume_identifier()?; - self.consume_whitespace(false)?; - let poly_vars = self.consume_polymorphic_vars(h)?; - self.consume_whitespace(false)?; - - let variants = match self.consume_comma_separated( - h, b'{', b'}', "Expected end of enum variant list", - |lexer, heap| { - // Variant identifier - let position = lexer.source.pos(); - let identifier = lexer.consume_identifier()?; - lexer.consume_whitespace(false)?; - - // Optional variant value/type - let next = lexer.source.next(); - let value = match next { - Some(b',') => { - // Do not consume, let `consume_comma_separated` handle - // the next item - EnumVariantValue::None - }, - Some(b'=') => { - // Integer value - lexer.source.consume(); - lexer.consume_whitespace(false)?; - if !lexer.has_integer() { - return Err(lexer.error_at_pos("expected integer")) - } - let value = lexer.consume_integer()?; - EnumVariantValue::Integer(value) - }, - Some(b'}') => { - // End of enum - EnumVariantValue::None - } - _ => { - return Err(lexer.error_at_pos("Expected ',', '}' or '='")); - } - }; - - Ok(EnumVariantDefinition{ position, identifier, value }) - } - )? { - Some(variants) => variants, - None => return Err(ParseError::new_error( - self.source, enum_pos, - "An enum definition must be followed by its variants" - )), - }; - - Ok(h.alloc_enum_definition(|this| EnumDefinition{ - this, - position: enum_pos, - identifier: enum_ident, - poly_vars, - variants, - })) - } - fn consume_union_definition(&mut self, h: &mut Heap) -> Result { - // Parse "union" keyword, optional polyvars and the identifier - let union_pos = self.source.pos(); - self.consume_keyword(b"union")?; - self.consume_whitespace(true)?; - let union_ident = self.consume_identifier()?; - self.consume_whitespace(false)?; - let poly_vars = self.consume_polymorphic_vars(h)?; - self.consume_whitespace(false)?; - - let variants = match self.consume_comma_separated( - h, b'{', b'}', "Expected end of union variant list", - |lexer, heap| { - // Variant identifier - let position = lexer.source.pos(); - let identifier = lexer.consume_identifier()?; - lexer.consume_whitespace(false)?; - - // Optional variant value - let next = lexer.source.next(); - let value = match next { - Some(b',') | Some(b'}') => { - // Continue parsing using `consume_comma_separated` - UnionVariantValue::None - }, - Some(b'(') => { - // Embedded type(s) - let embedded = lexer.consume_comma_separated( - heap, b'(', b')', "Expected end of embedded type list of union variant", - |lexer, heap| { - lexer.consume_type(heap, false) - } - )?.unwrap(); - - if embedded.is_empty() { - return Err(lexer.error_at_pos("Expected at least one embedded type")); - } - - UnionVariantValue::Embedded(embedded) - }, - _ => { - return Err(lexer.error_at_pos("Expected ',', '}' or '('")); - }, - }; - - Ok(UnionVariantDefinition{ position, identifier, value }) - } - )? { - Some(variants) => variants, - None => return Err(ParseError::new_error( - self.source, union_pos, - "A union definition must be followed by its variants" - )), - }; - - Ok(h.alloc_union_definition(|this| UnionDefinition{ - this, - position: union_pos, - identifier: union_ident, - poly_vars, - variants, - })) - } - fn consume_component_definition(&mut self, h: &mut Heap) -> Result { - // TODO: Cleanup - if self.has_keyword(b"composite") { - Ok(self.consume_composite_definition(h)?) - } else { - Ok(self.consume_primitive_definition(h)?) - } - } - fn consume_composite_definition(&mut self, h: &mut Heap) -> Result { - // Parse keyword, optional polyvars and the identifier - let position = self.source.pos(); - self.consume_keyword(b"composite")?; - self.consume_whitespace(true)?; - let identifier = self.consume_identifier()?; - self.consume_whitespace(false)?; - let poly_vars = self.consume_polymorphic_vars(h)?; - self.consume_whitespace(false)?; - - // Consume parameters - let parameters = self.consume_parameters(h)?; - self.consume_whitespace(false)?; - - // Parse body - let body = self.consume_block_statement(h)?; - Ok(h.alloc_component(|this| ComponentDefinition { - this, - variant: ComponentVariant::Composite, - position, - identifier, - poly_vars, - parameters, - body - })) - } - fn consume_primitive_definition(&mut self, h: &mut Heap) -> Result { - // Consume keyword, optional polyvars and identifier - let position = self.source.pos(); - self.consume_keyword(b"primitive")?; - self.consume_whitespace(true)?; - let identifier = self.consume_identifier()?; - self.consume_whitespace(false)?; - let poly_vars = self.consume_polymorphic_vars(h)?; - self.consume_whitespace(false)?; - - // Consume parameters - let parameters = self.consume_parameters(h)?; - self.consume_whitespace(false)?; - - // Consume body - let body = self.consume_block_statement(h)?; - Ok(h.alloc_component(|this| ComponentDefinition { - this, - variant: ComponentVariant::Primitive, - position, - identifier, - poly_vars, - parameters, - body - })) - } - fn consume_function_definition(&mut self, h: &mut Heap) -> Result { - // Consume return type, optional polyvars and identifier - let position = self.source.pos(); - let return_type = self.consume_type(h, false)?; - self.consume_whitespace(true)?; - let identifier = self.consume_identifier()?; - self.consume_whitespace(false)?; - let poly_vars = self.consume_polymorphic_vars(h)?; - self.consume_whitespace(false)?; - - // Consume parameters - let parameters = self.consume_parameters(h)?; - self.consume_whitespace(false)?; - - // Consume body - let body = self.consume_block_statement(h)?; - Ok(h.alloc_function(|this| FunctionDefinition { - this, - position, - return_type, - identifier, - poly_vars, - parameters, - body, - })) - } - fn has_pragma(&self) -> bool { - if let Some(c) = self.source.next() { - c == b'#' - } else { - false - } - } - fn consume_pragma(&mut self, h: &mut Heap) -> Result { - let position = self.source.pos(); - let next = self.source.next(); - if next != Some(b'#') { - return Err(self.error_at_pos("Expected pragma")); - } - self.source.consume(); - if !is_vchar(self.source.next()) { - return Err(self.error_at_pos("Expected pragma")); - } - if self.has_string(b"version") { - self.consume_string(b"version")?; - self.consume_whitespace(true)?; - if !self.has_integer() { - return Err(self.error_at_pos("Expected integer constant")); - } - let version = self.consume_integer()?; - debug_assert!(version >= 0); - return Ok(h.alloc_pragma(|this| Pragma::Version(PragmaVersion{ - this, position, version: version as u64 - }))) - } else if self.has_string(b"module") { - self.consume_string(b"module")?; - self.consume_whitespace(true)?; - if !self.has_identifier() { - return Err(self.error_at_pos("Expected identifier")); - } - let mut value = Vec::new(); - let mut ident = self.consume_ident()?; - value.append(&mut ident); - while self.has_string(b".") { - self.consume_string(b".")?; - value.push(b'.'); - ident = self.consume_ident()?; - value.append(&mut ident); - } - return Ok(h.alloc_pragma(|this| Pragma::Module(PragmaModule{ - this, position, value - }))); - } else { - return Err(self.error_at_pos("Unknown pragma")); - } - } - - fn has_import(&self) -> bool { - self.has_keyword(b"import") - } - fn consume_import(&mut self, h: &mut Heap) -> Result { - // Parse the word "import" and the name of the module - let position = self.source.pos(); - self.consume_keyword(b"import")?; - self.consume_whitespace(true)?; - let mut value = Vec::new(); - let mut last_ident_pos = self.source.pos(); - let mut ident = self.consume_ident()?; - value.append(&mut ident); - let mut last_ident_start = 0; - - while self.has_string(b".") { - self.consume_string(b".")?; - value.push(b'.'); - last_ident_pos = self.source.pos(); - ident = self.consume_ident()?; - last_ident_start = value.len(); - value.append(&mut ident); - } - - - self.consume_whitespace(false)?; - - // Check for the potential aliasing or specific module imports - let import = if self.has_string(b"as") { - self.consume_string(b"as")?; - self.consume_whitespace(true)?; - let alias = self.consume_identifier()?; - - h.alloc_import(|this| Import::Module(ImportModule{ - this, - position, - module: value, - alias, - module_id: None, - })) - } else if self.has_string(b"::") { - self.consume_string(b"::")?; - self.consume_whitespace(false)?; - - let next = self.source.next(); - if Some(b'{') == next { - let symbols = match self.consume_comma_separated( - h, b'{', b'}', "Expected end of import list", - |lexer, _heap| { - // Symbol name - let position = lexer.source.pos(); - let name = lexer.consume_identifier()?; - lexer.consume_whitespace(false)?; - - // Symbol alias - if lexer.has_string(b"as") { - // With alias - lexer.consume_string(b"as")?; - lexer.consume_whitespace(true)?; - let alias = lexer.consume_identifier()?; - - Ok(AliasedSymbol{ - position, - name, - alias, - definition_id: None - }) - } else { - // Without alias - Ok(AliasedSymbol{ - position, - name: name.clone(), - alias: name, - definition_id: None - }) - } - } - )? { - Some(symbols) => symbols, - None => unreachable!(), // because we checked for opening '{' - }; - - h.alloc_import(|this| Import::Symbols(ImportSymbols{ - this, - position, - module: value, - module_id: None, - symbols, - })) - } else if Some(b'*') == next { - self.source.consume(); - h.alloc_import(|this| Import::Symbols(ImportSymbols{ - this, - position, - module: value, - module_id: None, - symbols: Vec::new() - })) - } else if self.has_identifier() { - let position = self.source.pos(); - let name = self.consume_identifier()?; - self.consume_whitespace(false)?; - let alias = if self.has_string(b"as") { - self.consume_string(b"as")?; - self.consume_whitespace(true)?; - self.consume_identifier()? - } else { - name.clone() - }; - - h.alloc_import(|this| Import::Symbols(ImportSymbols{ - this, - position, - module: value, - module_id: None, - symbols: vec![AliasedSymbol{ - position, - name, - alias, - definition_id: None - }] - })) - } else { - return Err(self.error_at_pos("Expected '*', '{' or a symbol name")); - } - } else { - // No explicit alias or subimports, so implicit alias - let alias_value = Vec::from(&value[last_ident_start..]); - h.alloc_import(|this| Import::Module(ImportModule{ - this, - position, - module: value, - alias: Identifier{ - position: last_ident_pos, - value: Vec::from(alias_value), - }, - module_id: None, - })) - }; - - self.consume_whitespace(false)?; - self.consume_string(b";")?; - Ok(import) - } - pub fn consume_protocol_description(&mut self, h: &mut Heap) -> Result { - let position = self.source.pos(); - let mut pragmas = Vec::new(); - let mut imports = Vec::new(); - let mut definitions = Vec::new(); - self.consume_whitespace(false)?; - while self.has_pragma() { - let pragma = self.consume_pragma(h)?; - pragmas.push(pragma); - self.consume_whitespace(false)?; - } - while self.has_import() { - let import = self.consume_import(h)?; - imports.push(import); - self.consume_whitespace(false)?; - } - while self.has_symbol_definition() { - let def = self.consume_symbol_definition(h)?; - definitions.push(def); - self.consume_whitespace(false)?; - } - // end of file - if !self.source.is_eof() { - return Err(self.error_at_pos("Expected end of file")); - } - Ok(h.alloc_protocol_description(|this| Root { - this, - position, - pragmas, - imports, - definitions, - })) - } -} diff --git a/src/protocol/mod.rs b/src/protocol/mod.rs index 95f635752057d7e6aade690a6d91755d5d978c48..4342e93024695cb32775328e32f2cb3d4989a399 100644 --- a/src/protocol/mod.rs +++ b/src/protocol/mod.rs @@ -1,29 +1,16 @@ mod arena; -// mod ast; mod eval; -pub(crate) mod inputsource; -pub(crate) mod input_source2; -// mod lexer; +pub(crate) mod input_source; mod parser; #[cfg(test)] mod tests; -// TODO: Remove when not benchmarking pub(crate) mod ast; pub(crate) mod ast_printer; -pub(crate) mod lexer; - -lazy_static::lazy_static! { - /// Conveniently-provided protocol description initialized with a zero-length PDL string. - /// Exposed to minimize repeated initializations of this common protocol description. - pub static ref TRIVIAL_PD: std::sync::Arc = { - std::sync::Arc::new(ProtocolDescription::parse(b"").unwrap()) - }; -} use crate::common::*; use crate::protocol::ast::*; use crate::protocol::eval::*; -use crate::protocol::inputsource::*; +use crate::protocol::input_source::*; use crate::protocol::parser::*; /// Description of a protocol object, used to configure new connectors. @@ -54,9 +41,9 @@ impl ProtocolDescription { pub fn parse(buffer: &[u8]) -> Result { // TODO: @fixme, keep code compilable, but needs support for multiple // input files. - let source = InputSource::from_buffer(buffer).unwrap(); + let source = InputSource::new(String::new(), Vec::from(buffer)); let mut parser = Parser::new(); - parser.feed(source).expect("failed to lex source"); + parser.feed(source).expect("failed to feed source"); if let Err(err) = parser.parse() { println!("ERROR:\n{}", err); @@ -64,8 +51,10 @@ impl ProtocolDescription { } debug_assert_eq!(parser.modules.len(), 1, "only supporting one module here for now"); - let root = parser.modules[0].root_id; - return Ok(ProtocolDescription { heap: parser.heap, source: parser.modules[0].source.clone(), root }); + let module = parser.modules.remove(0); + let root = module.root_id; + let source = module.source; + return Ok(ProtocolDescription { heap: parser.heap, source, root }); } pub(crate) fn component_polarities( &self, @@ -84,10 +73,10 @@ impl ProtocolDescription { } for ¶m in def.parameters().iter() { let param = &h[param]; - let parser_type = &h[param.parser_type]; + let first_element = ¶m.parser_type.elements[0]; - match parser_type.variant { - ParserTypeVariant::Input(_) | ParserTypeVariant::Output(_) => continue, + match first_element.variant { + ParserTypeVariant::Input | ParserTypeVariant::Output => continue, _ => { return Err(NonPortTypeParameters); } @@ -96,11 +85,11 @@ impl ProtocolDescription { let mut result = Vec::new(); for ¶m in def.parameters().iter() { let param = &h[param]; - let parser_type = &h[param.parser_type]; + let first_element = ¶m.parser_type.elements[0]; - if let ParserTypeVariant::Input(_) = parser_type.variant { + if first_element.variant == ParserTypeVariant::Input { result.push(Polarity::Getter) - } else if let ParserTypeVariant::Output(_) = parser_type.variant { + } else if first_element.variant == ParserTypeVariant::Output { result.push(Polarity::Putter) } else { unreachable!() diff --git a/src/protocol/parser/depth_visitor.rs b/src/protocol/parser/depth_visitor.rs index a3d1f7823e2879160495ca3b31cf569db85b8e33..fc3ef06d03bc0b4d36a52a78aadcac5724b5d4e5 100644 --- a/src/protocol/parser/depth_visitor.rs +++ b/src/protocol/parser/depth_visitor.rs @@ -1,5 +1,5 @@ use crate::protocol::ast::*; -use crate::protocol::inputsource::*; +use crate::protocol::input_source::*; // The following indirection is needed due to a bug in the cbindgen tool. type Unit = (); @@ -20,13 +20,13 @@ pub(crate) trait Visitor: Sized { fn visit_symbol_definition(&mut self, h: &mut Heap, def: DefinitionId) -> VisitorResult { recursive_symbol_definition(self, h, def) } - fn visit_struct_definition(&mut self, _h: &mut Heap, _def: StructId) -> VisitorResult { + fn visit_struct_definition(&mut self, _h: &mut Heap, _def: StructDefinitionId) -> VisitorResult { Ok(()) } - fn visit_enum_definition(&mut self, _h: &mut Heap, _def: EnumId) -> VisitorResult { + fn visit_enum_definition(&mut self, _h: &mut Heap, _def: EnumDefinitionId) -> VisitorResult { Ok(()) } - fn visit_union_definition(&mut self, _h: &mut Heap, _def: UnionId) -> VisitorResult { + fn visit_union_definition(&mut self, _h: &mut Heap, _def: UnionDefinitionId) -> VisitorResult { Ok(()) } fn visit_component_definition(&mut self, h: &mut Heap, def: ComponentDefinitionId) -> VisitorResult { @@ -38,7 +38,7 @@ pub(crate) trait Visitor: Sized { fn visit_primitive_definition(&mut self, h: &mut Heap, def: ComponentDefinitionId) -> VisitorResult { recursive_primitive_definition(self, h, def) } - fn visit_function_definition(&mut self, h: &mut Heap, def: FunctionId) -> VisitorResult { + fn visit_function_definition(&mut self, h: &mut Heap, def: FunctionDefinitionId) -> VisitorResult { recursive_function_definition(self, h, def) } @@ -74,9 +74,6 @@ pub(crate) trait Visitor: Sized { fn visit_labeled_statement(&mut self, h: &mut Heap, stmt: LabeledStatementId) -> VisitorResult { recursive_labeled_statement(self, h, stmt) } - fn visit_skip_statement(&mut self, _h: &mut Heap, _stmt: SkipStatementId) -> VisitorResult { - Ok(()) - } fn visit_if_statement(&mut self, h: &mut Heap, stmt: IfStatementId) -> VisitorResult { recursive_if_statement(self, h, stmt) } @@ -112,15 +109,12 @@ pub(crate) trait Visitor: Sized { fn visit_return_statement(&mut self, h: &mut Heap, stmt: ReturnStatementId) -> VisitorResult { recursive_return_statement(self, h, stmt) } - fn visit_assert_statement(&mut self, h: &mut Heap, stmt: AssertStatementId) -> VisitorResult { - recursive_assert_statement(self, h, stmt) + fn visit_new_statement(&mut self, h: &mut Heap, stmt: NewStatementId) -> VisitorResult { + recursive_new_statement(self, h, stmt) } fn visit_goto_statement(&mut self, _h: &mut Heap, _stmt: GotoStatementId) -> VisitorResult { Ok(()) } - fn visit_new_statement(&mut self, h: &mut Heap, stmt: NewStatementId) -> VisitorResult { - recursive_new_statement(self, h, stmt) - } fn visit_expression_statement( &mut self, h: &mut Heap, @@ -176,9 +170,6 @@ pub(crate) trait Visitor: Sized { fn visit_select_expression(&mut self, h: &mut Heap, expr: SelectExpressionId) -> VisitorResult { recursive_select_expression(self, h, expr) } - fn visit_array_expression(&mut self, h: &mut Heap, expr: ArrayExpressionId) -> VisitorResult { - recursive_array_expression(self, h, expr) - } fn visit_call_expression(&mut self, h: &mut Heap, expr: CallExpressionId) -> VisitorResult { recursive_call_expression(self, h, expr) } @@ -294,12 +285,12 @@ fn recursive_primitive_definition( fn recursive_function_definition( this: &mut T, h: &mut Heap, - def: FunctionId, + def: FunctionDefinitionId, ) -> VisitorResult { for ¶m in h[def].parameters.clone().iter() { recursive_parameter_as_variable(this, h, param)?; } - this.visit_statement(h, h[def].body) + this.visit_block_statement(h, h[def].body) } fn recursive_variable_declaration( @@ -317,7 +308,6 @@ fn recursive_statement(this: &mut T, h: &mut Heap, stmt: StatementId match h[stmt].clone() { Statement::Block(stmt) => this.visit_block_statement(h, stmt.this), Statement::Local(stmt) => this.visit_local_statement(h, stmt.this()), - Statement::Skip(stmt) => this.visit_skip_statement(h, stmt.this), Statement::Labeled(stmt) => this.visit_labeled_statement(h, stmt.this), Statement::If(stmt) => this.visit_if_statement(h, stmt.this), Statement::While(stmt) => this.visit_while_statement(h, stmt.this), @@ -325,7 +315,6 @@ fn recursive_statement(this: &mut T, h: &mut Heap, stmt: StatementId Statement::Continue(stmt) => this.visit_continue_statement(h, stmt.this), Statement::Synchronous(stmt) => this.visit_synchronous_statement(h, stmt.this), Statement::Return(stmt) => this.visit_return_statement(h, stmt.this), - Statement::Assert(stmt) => this.visit_assert_statement(h, stmt.this), Statement::Goto(stmt) => this.visit_goto_statement(h, stmt.this), Statement::New(stmt) => this.visit_new_statement(h, stmt.this), Statement::Expression(stmt) => this.visit_expression_statement(h, stmt.this), @@ -407,15 +396,8 @@ fn recursive_return_statement( h: &mut Heap, stmt: ReturnStatementId, ) -> VisitorResult { - this.visit_expression(h, h[stmt].expression) -} - -fn recursive_assert_statement( - this: &mut T, - h: &mut Heap, - stmt: AssertStatementId, -) -> VisitorResult { - this.visit_expression(h, h[stmt].expression) + debug_assert_eq!(h[stmt].expressions.len(), 1); + this.visit_expression(h, h[stmt].expressions[0]) } fn recursive_new_statement( @@ -448,7 +430,6 @@ fn recursive_expression( Expression::Indexing(expr) => this.visit_indexing_expression(h, expr.this), Expression::Slicing(expr) => this.visit_slicing_expression(h, expr.this), Expression::Select(expr) => this.visit_select_expression(h, expr.this), - Expression::Array(expr) => this.visit_array_expression(h, expr.this), Expression::Literal(expr) => this.visit_constant_expression(h, expr.this), Expression::Call(expr) => this.visit_call_expression(h, expr.this), Expression::Variable(expr) => this.visit_variable_expression(h, expr.this), @@ -527,17 +508,6 @@ fn recursive_select_expression( this.visit_expression(h, h[expr].subject) } -fn recursive_array_expression( - this: &mut T, - h: &mut Heap, - expr: ArrayExpressionId, -) -> VisitorResult { - for &expr in h[expr].elements.clone().iter() { - this.visit_expression(h, expr)?; - } - Ok(()) -} - fn recursive_call_expression( this: &mut T, h: &mut Heap, @@ -553,287 +523,6 @@ fn recursive_call_expression( // Grammar Rules // ==================== -pub(crate) struct NestedSynchronousStatements { - illegal: bool, -} - -impl NestedSynchronousStatements { - pub(crate) fn new() -> Self { - NestedSynchronousStatements { illegal: false } - } -} - -impl Visitor for NestedSynchronousStatements { - fn visit_composite_definition(&mut self, h: &mut Heap, def: ComponentDefinitionId) -> VisitorResult { - assert!(!self.illegal); - self.illegal = true; - recursive_composite_definition(self, h, def)?; - self.illegal = false; - Ok(()) - } - fn visit_function_definition(&mut self, h: &mut Heap, def: FunctionDefinitionId) -> VisitorResult { - assert!(!self.illegal); - self.illegal = true; - recursive_function_definition(self, h, def)?; - self.illegal = false; - Ok(()) - } - fn visit_synchronous_statement( - &mut self, - h: &mut Heap, - stmt: SynchronousStatementId, - ) -> VisitorResult { - if self.illegal { - return Err(( - h[stmt].position(), - "Illegal nested synchronous statement".to_string(), - )); - } - self.illegal = true; - recursive_synchronous_statement(self, h, stmt)?; - self.illegal = false; - Ok(()) - } - fn visit_expression(&mut self, _h: &mut Heap, _expr: ExpressionId) -> VisitorResult { - Ok(()) - } -} - -pub(crate) struct ChannelStatementOccurrences { - illegal: bool, -} - -impl ChannelStatementOccurrences { - pub(crate) fn new() -> Self { - ChannelStatementOccurrences { illegal: false } - } -} - -impl Visitor for ChannelStatementOccurrences { - fn visit_primitive_definition(&mut self, h: &mut Heap, def: ComponentDefinitionId) -> VisitorResult { - assert!(!self.illegal); - self.illegal = true; - recursive_primitive_definition(self, h, def)?; - self.illegal = false; - Ok(()) - } - fn visit_function_definition(&mut self, h: &mut Heap, def: FunctionId) -> VisitorResult { - assert!(!self.illegal); - self.illegal = true; - recursive_function_definition(self, h, def)?; - self.illegal = false; - Ok(()) - } - fn visit_channel_statement(&mut self, h: &mut Heap, stmt: ChannelStatementId) -> VisitorResult { - if self.illegal { - return Err((h[stmt].position(), "Illegal channel declaration".to_string())); - } - Ok(()) - } - fn visit_expression(&mut self, _h: &mut Heap, _expr: ExpressionId) -> VisitorResult { - Ok(()) - } -} - -pub(crate) struct FunctionStatementReturns {} - -impl FunctionStatementReturns { - pub(crate) fn new() -> Self { - FunctionStatementReturns {} - } - fn function_error(&self, position: InputPosition) -> VisitorResult { - Err((position, "Function definition must return".to_string())) - } -} - -impl Visitor for FunctionStatementReturns { - fn visit_component_definition(&mut self, _h: &mut Heap, _def: ComponentDefinitionId) -> VisitorResult { - Ok(()) - } - fn visit_variable_declaration(&mut self, _h: &mut Heap, _decl: VariableId) -> VisitorResult { - Ok(()) - } - fn visit_block_statement(&mut self, h: &mut Heap, block: BlockStatementId) -> VisitorResult { - let len = h[block].statements.len(); - assert!(len > 0); - self.visit_statement(h, h[block].statements[len - 1]) - } - fn visit_skip_statement(&mut self, h: &mut Heap, stmt: SkipStatementId) -> VisitorResult { - self.function_error(h[stmt].position) - } - fn visit_break_statement(&mut self, h: &mut Heap, stmt: BreakStatementId) -> VisitorResult { - self.function_error(h[stmt].position) - } - fn visit_continue_statement( - &mut self, - h: &mut Heap, - stmt: ContinueStatementId, - ) -> VisitorResult { - self.function_error(h[stmt].position) - } - fn visit_assert_statement(&mut self, h: &mut Heap, stmt: AssertStatementId) -> VisitorResult { - self.function_error(h[stmt].position) - } - fn visit_new_statement(&mut self, h: &mut Heap, stmt: NewStatementId) -> VisitorResult { - self.function_error(h[stmt].position) - } - fn visit_expression_statement( - &mut self, - h: &mut Heap, - stmt: ExpressionStatementId, - ) -> VisitorResult { - self.function_error(h[stmt].position) - } - fn visit_expression(&mut self, _h: &mut Heap, _expr: ExpressionId) -> VisitorResult { - Ok(()) - } -} - -pub(crate) struct ComponentStatementReturnNew { - illegal_new: bool, - illegal_return: bool, -} - -impl ComponentStatementReturnNew { - pub(crate) fn new() -> Self { - ComponentStatementReturnNew { illegal_new: false, illegal_return: false } - } -} - -impl Visitor for ComponentStatementReturnNew { - fn visit_component_definition(&mut self, h: &mut Heap, def: ComponentDefinitionId) -> VisitorResult { - assert!(!(self.illegal_new || self.illegal_return)); - self.illegal_return = true; - recursive_component_definition(self, h, def)?; - self.illegal_return = false; - Ok(()) - } - fn visit_primitive_definition(&mut self, h: &mut Heap, def: ComponentDefinitionId) -> VisitorResult { - assert!(!self.illegal_new); - self.illegal_new = true; - recursive_primitive_definition(self, h, def)?; - self.illegal_new = false; - Ok(()) - } - fn visit_function_definition(&mut self, h: &mut Heap, def: FunctionId) -> VisitorResult { - assert!(!(self.illegal_new || self.illegal_return)); - self.illegal_new = true; - recursive_function_definition(self, h, def)?; - self.illegal_new = false; - Ok(()) - } - fn visit_variable_declaration(&mut self, _h: &mut Heap, _decl: VariableId) -> VisitorResult { - Ok(()) - } - fn visit_return_statement(&mut self, h: &mut Heap, stmt: ReturnStatementId) -> VisitorResult { - if self.illegal_return { - Err((h[stmt].position, "Component definition must not return".to_string())) - } else { - recursive_return_statement(self, h, stmt) - } - } - fn visit_new_statement(&mut self, h: &mut Heap, stmt: NewStatementId) -> VisitorResult { - if self.illegal_new { - Err(( - h[stmt].position, - "Symbol definition contains illegal new statement".to_string(), - )) - } else { - recursive_new_statement(self, h, stmt) - } - } - fn visit_expression(&mut self, _h: &mut Heap, _expr: ExpressionId) -> VisitorResult { - Ok(()) - } -} - -pub(crate) struct CheckBuiltinOccurrences { - legal: bool, -} - -impl CheckBuiltinOccurrences { - pub(crate) fn new() -> Self { - CheckBuiltinOccurrences { legal: false } - } -} - -impl Visitor for CheckBuiltinOccurrences { - fn visit_synchronous_statement( - &mut self, - h: &mut Heap, - stmt: SynchronousStatementId, - ) -> VisitorResult { - assert!(!self.legal); - self.legal = true; - recursive_synchronous_statement(self, h, stmt)?; - self.legal = false; - Ok(()) - } - fn visit_call_expression(&mut self, h: &mut Heap, expr: CallExpressionId) -> VisitorResult { - match h[expr].method { - Method::Get | Method::Fires => { - if !self.legal { - return Err((h[expr].position, "Illegal built-in occurrence".to_string())); - } - } - _ => {} - } - recursive_call_expression(self, h, expr) - } -} - -pub(crate) struct BuildScope { - scope: Option, -} - -impl BuildScope { - pub(crate) fn new() -> Self { - BuildScope { scope: None } - } -} - -impl Visitor for BuildScope { - fn visit_symbol_definition(&mut self, h: &mut Heap, def: DefinitionId) -> VisitorResult { - assert!(self.scope.is_none()); - self.scope = Some(Scope::Definition(def)); - recursive_symbol_definition(self, h, def)?; - self.scope = None; - Ok(()) - } - fn visit_block_statement(&mut self, h: &mut Heap, stmt: BlockStatementId) -> VisitorResult { - assert!(!self.scope.is_none()); - let old = self.scope; - // First store the current scope - h[stmt].parent_scope = self.scope; - // Then move scope down to current block - self.scope = Some(Scope::Regular(stmt)); - recursive_block_statement(self, h, stmt)?; - // Move scope back up - self.scope = old; - Ok(()) - } - fn visit_synchronous_statement( - &mut self, - h: &mut Heap, - stmt: SynchronousStatementId, - ) -> VisitorResult { - assert!(!self.scope.is_none()); - let old = self.scope; - // First store the current scope - h[stmt].parent_scope = self.scope; - // Then move scope down to current sync - // TODO: Should be legal-ish, but very wrong - self.scope = Some(Scope::Synchronous((stmt, BlockStatementId(stmt.upcast())))); - recursive_synchronous_statement(self, h, stmt)?; - // Move scope back up - self.scope = old; - Ok(()) - } - fn visit_expression(&mut self, _h: &mut Heap, _expr: ExpressionId) -> VisitorResult { - Ok(()) - } -} - pub(crate) struct UniqueStatementId(StatementId); pub(crate) struct LinkStatements { @@ -867,10 +556,6 @@ impl Visitor for LinkStatements { fn visit_labeled_statement(&mut self, h: &mut Heap, stmt: LabeledStatementId) -> VisitorResult { recursive_labeled_statement(self, h, stmt) } - fn visit_skip_statement(&mut self, _h: &mut Heap, stmt: SkipStatementId) -> VisitorResult { - self.prev = Some(UniqueStatementId(stmt.upcast())); - Ok(()) - } fn visit_if_statement(&mut self, h: &mut Heap, stmt: IfStatementId) -> VisitorResult { // Link the two branches to the corresponding EndIf pseudo-statement let end_if_id = h[stmt].end_if; @@ -963,10 +648,6 @@ impl Visitor for LinkStatements { fn visit_return_statement(&mut self, _h: &mut Heap, _stmt: ReturnStatementId) -> VisitorResult { Ok(()) } - fn visit_assert_statement(&mut self, _h: &mut Heap, stmt: AssertStatementId) -> VisitorResult { - self.prev = Some(UniqueStatementId(stmt.upcast())); - Ok(()) - } fn visit_goto_statement(&mut self, _h: &mut Heap, _stmt: GotoStatementId) -> VisitorResult { Ok(()) } @@ -987,237 +668,6 @@ impl Visitor for LinkStatements { } } -pub(crate) struct BuildLabels { - block: Option, - sync_enclosure: Option, -} - -impl BuildLabels { - pub(crate) fn new() -> Self { - BuildLabels { block: None, sync_enclosure: None } - } -} - -impl Visitor for BuildLabels { - fn visit_block_statement(&mut self, h: &mut Heap, stmt: BlockStatementId) -> VisitorResult { - assert_eq!(self.block, h[stmt].parent_block(h)); - let old = self.block; - self.block = Some(stmt); - recursive_block_statement(self, h, stmt)?; - self.block = old; - Ok(()) - } - fn visit_labeled_statement(&mut self, h: &mut Heap, stmt: LabeledStatementId) -> VisitorResult { - assert!(!self.block.is_none()); - // Store label in current block (on the fly) - h[self.block.unwrap()].labels.push(stmt); - // Update synchronous scope of label - h[stmt].in_sync = self.sync_enclosure; - recursive_labeled_statement(self, h, stmt) - } - fn visit_while_statement(&mut self, h: &mut Heap, stmt: WhileStatementId) -> VisitorResult { - h[stmt].in_sync = self.sync_enclosure; - recursive_while_statement(self, h, stmt) - } - fn visit_synchronous_statement( - &mut self, - h: &mut Heap, - stmt: SynchronousStatementId, - ) -> VisitorResult { - assert!(self.sync_enclosure.is_none()); - self.sync_enclosure = Some(stmt); - recursive_synchronous_statement(self, h, stmt)?; - self.sync_enclosure = None; - Ok(()) - } - fn visit_expression(&mut self, _h: &mut Heap, _expr: ExpressionId) -> VisitorResult { - Ok(()) - } -} - -pub(crate) struct ResolveLabels { - block: Option, - while_enclosure: Option, - sync_enclosure: Option, -} - -impl ResolveLabels { - pub(crate) fn new() -> Self { - ResolveLabels { block: None, while_enclosure: None, sync_enclosure: None } - } - fn check_duplicate_impl( - h: &Heap, - block: Option, - stmt: LabeledStatementId, - ) -> VisitorResult { - if let Some(block) = block { - // Checking the parent first is important. Otherwise, labels - // overshadow previously defined labels: and this is illegal! - ResolveLabels::check_duplicate_impl(h, h[block].parent_block(h), stmt)?; - // For the current block, check for a duplicate. - for &other_stmt in h[block].labels.iter() { - if other_stmt == stmt { - continue; - } else { - if h[other_stmt].label == h[stmt].label { - return Err((h[stmt].position, "Duplicate label".to_string())); - } - } - } - } - Ok(()) - } - fn check_duplicate(&self, h: &Heap, stmt: LabeledStatementId) -> VisitorResult { - ResolveLabels::check_duplicate_impl(h, self.block, stmt) - } - fn get_target( - &self, - h: &Heap, - id: &Identifier, - ) -> Result { - if let Some(stmt) = ResolveLabels::find_target(h, self.block, id) { - Ok(stmt) - } else { - Err((id.position, "Unresolved label".to_string())) - } - } - fn find_target( - h: &Heap, - block: Option, - id: &Identifier, - ) -> Option { - if let Some(block) = block { - // It does not matter in what order we find the labels. - // If there are duplicates: that is checked elsewhere. - for &stmt in h[block].labels.iter() { - if h[stmt].label == *id { - return Some(stmt); - } - } - if let Some(stmt) = ResolveLabels::find_target(h, h[block].parent_block(h), id) { - return Some(stmt); - } - } - None - } -} - -impl Visitor for ResolveLabels { - fn visit_block_statement(&mut self, h: &mut Heap, stmt: BlockStatementId) -> VisitorResult { - assert_eq!(self.block, h[stmt].parent_block(h)); - let old = self.block; - self.block = Some(stmt); - recursive_block_statement(self, h, stmt)?; - self.block = old; - Ok(()) - } - fn visit_labeled_statement(&mut self, h: &mut Heap, stmt: LabeledStatementId) -> VisitorResult { - assert!(!self.block.is_none()); - self.check_duplicate(h, stmt)?; - recursive_labeled_statement(self, h, stmt) - } - fn visit_while_statement(&mut self, h: &mut Heap, stmt: WhileStatementId) -> VisitorResult { - let old = self.while_enclosure; - self.while_enclosure = Some(stmt); - recursive_while_statement(self, h, stmt)?; - self.while_enclosure = old; - Ok(()) - } - fn visit_break_statement(&mut self, h: &mut Heap, stmt: BreakStatementId) -> VisitorResult { - let the_while; - if let Some(label) = &h[stmt].label { - let target = self.get_target(h, label)?; - let target = &h[h[target].body]; - if !target.is_while() { - return Err(( - h[stmt].position, - "Illegal break: target not a while statement".to_string(), - )); - } - the_while = target.as_while(); - // TODO: check if break is nested under while - } else { - if self.while_enclosure.is_none() { - return Err(( - h[stmt].position, - "Illegal break: no surrounding while statement".to_string(), - )); - } - the_while = &h[self.while_enclosure.unwrap()]; - // break is always nested under while, by recursive vistor - } - if the_while.in_sync != self.sync_enclosure { - return Err(( - h[stmt].position, - "Illegal break: synchronous statement escape".to_string(), - )); - } - h[stmt].target = the_while.end_while; - Ok(()) - } - fn visit_continue_statement( - &mut self, - h: &mut Heap, - stmt: ContinueStatementId, - ) -> VisitorResult { - let the_while; - if let Some(label) = &h[stmt].label { - let target = self.get_target(h, label)?; - let target = &h[h[target].body]; - if !target.is_while() { - return Err(( - h[stmt].position, - "Illegal continue: target not a while statement".to_string(), - )); - } - the_while = target.as_while(); - // TODO: check if continue is nested under while - } else { - if self.while_enclosure.is_none() { - return Err(( - h[stmt].position, - "Illegal continue: no surrounding while statement".to_string(), - )); - } - the_while = &h[self.while_enclosure.unwrap()]; - // continue is always nested under while, by recursive vistor - } - if the_while.in_sync != self.sync_enclosure { - return Err(( - h[stmt].position, - "Illegal continue: synchronous statement escape".to_string(), - )); - } - h[stmt].target = Some(the_while.this); - Ok(()) - } - fn visit_synchronous_statement( - &mut self, - h: &mut Heap, - stmt: SynchronousStatementId, - ) -> VisitorResult { - assert!(self.sync_enclosure.is_none()); - self.sync_enclosure = Some(stmt); - recursive_synchronous_statement(self, h, stmt)?; - self.sync_enclosure = None; - Ok(()) - } - fn visit_goto_statement(&mut self, h: &mut Heap, stmt: GotoStatementId) -> VisitorResult { - let target = self.get_target(h, &h[stmt].label)?; - if h[target].in_sync != self.sync_enclosure { - return Err(( - h[stmt].position, - "Illegal goto: synchronous statement escape".to_string(), - )); - } - h[stmt].target = Some(target); - Ok(()) - } - fn visit_expression(&mut self, _h: &mut Heap, _expr: ExpressionId) -> VisitorResult { - Ok(()) - } -} - pub(crate) struct AssignableExpressions { assignable: bool, } @@ -1238,7 +688,7 @@ impl Visitor for AssignableExpressions { expr: AssignmentExpressionId, ) -> VisitorResult { if self.assignable { - self.error(h[expr].position) + self.error(h[expr].span.begin) } else { self.assignable = true; self.visit_expression(h, h[expr].left)?; @@ -1252,21 +702,21 @@ impl Visitor for AssignableExpressions { expr: ConditionalExpressionId, ) -> VisitorResult { if self.assignable { - self.error(h[expr].position) + self.error(h[expr].span.begin) } else { recursive_conditional_expression(self, h, expr) } } fn visit_binary_expression(&mut self, h: &mut Heap, expr: BinaryExpressionId) -> VisitorResult { if self.assignable { - self.error(h[expr].position) + self.error(h[expr].span.begin) } else { recursive_binary_expression(self, h, expr) } } fn visit_unary_expression(&mut self, h: &mut Heap, expr: UnaryExpressionId) -> VisitorResult { if self.assignable { - self.error(h[expr].position) + self.error(h[expr].span.begin) } else { match h[expr].operation { UnaryOperation::PostDecrement @@ -1306,7 +756,7 @@ impl Visitor for AssignableExpressions { } fn visit_select_expression(&mut self, h: &mut Heap, expr: SelectExpressionId) -> VisitorResult { if h[expr].field.is_length() && self.assignable { - return self.error(h[expr].position); + return self.error(h[expr].span.begin); } let old = self.assignable; self.assignable = false; @@ -1314,16 +764,9 @@ impl Visitor for AssignableExpressions { self.assignable = old; Ok(()) } - fn visit_array_expression(&mut self, h: &mut Heap, expr: ArrayExpressionId) -> VisitorResult { - if self.assignable { - self.error(h[expr].position) - } else { - recursive_array_expression(self, h, expr) - } - } fn visit_call_expression(&mut self, h: &mut Heap, expr: CallExpressionId) -> VisitorResult { if self.assignable { - self.error(h[expr].position) + self.error(h[expr].span.begin) } else { recursive_call_expression(self, h, expr) } @@ -1334,7 +777,7 @@ impl Visitor for AssignableExpressions { expr: LiteralExpressionId, ) -> VisitorResult { if self.assignable { - self.error(h[expr].position) + self.error(h[expr].span.begin) } else { Ok(()) } @@ -1368,7 +811,7 @@ impl Visitor for IndexableExpressions { expr: AssignmentExpressionId, ) -> VisitorResult { if self.indexable { - self.error(h[expr].position) + self.error(h[expr].span.begin) } else { recursive_assignment_expression(self, h, expr) } @@ -1387,14 +830,14 @@ impl Visitor for IndexableExpressions { } fn visit_binary_expression(&mut self, h: &mut Heap, expr: BinaryExpressionId) -> VisitorResult { if self.indexable && h[expr].operation != BinaryOperator::Concatenate { - self.error(h[expr].position) + self.error(h[expr].span.begin) } else { recursive_binary_expression(self, h, expr) } } fn visit_unary_expression(&mut self, h: &mut Heap, expr: UnaryExpressionId) -> VisitorResult { if self.indexable { - self.error(h[expr].position) + self.error(h[expr].span.begin) } else { recursive_unary_expression(self, h, expr) } @@ -1433,13 +876,6 @@ impl Visitor for IndexableExpressions { self.indexable = old; Ok(()) } - fn visit_array_expression(&mut self, h: &mut Heap, expr: ArrayExpressionId) -> VisitorResult { - let old = self.indexable; - self.indexable = false; - recursive_array_expression(self, h, expr)?; - self.indexable = old; - Ok(()) - } fn visit_call_expression(&mut self, h: &mut Heap, expr: CallExpressionId) -> VisitorResult { let old = self.indexable; self.indexable = false; @@ -1453,7 +889,7 @@ impl Visitor for IndexableExpressions { expr: LiteralExpressionId, ) -> VisitorResult { if self.indexable { - self.error(h[expr].position) + self.error(h[expr].span.begin) } else { Ok(()) } @@ -1500,14 +936,14 @@ impl Visitor for SelectableExpressions { } fn visit_binary_expression(&mut self, h: &mut Heap, expr: BinaryExpressionId) -> VisitorResult { if self.selectable && h[expr].operation != BinaryOperator::Concatenate { - self.error(h[expr].position) + self.error(h[expr].span.begin) } else { recursive_binary_expression(self, h, expr) } } fn visit_unary_expression(&mut self, h: &mut Heap, expr: UnaryExpressionId) -> VisitorResult { if self.selectable { - self.error(h[expr].position) + self.error(h[expr].span.begin) } else { recursive_unary_expression(self, h, expr) } @@ -1541,13 +977,6 @@ impl Visitor for SelectableExpressions { self.selectable = old; Ok(()) } - fn visit_array_expression(&mut self, h: &mut Heap, expr: ArrayExpressionId) -> VisitorResult { - let old = self.selectable; - self.selectable = false; - recursive_array_expression(self, h, expr)?; - self.selectable = old; - Ok(()) - } fn visit_call_expression(&mut self, h: &mut Heap, expr: CallExpressionId) -> VisitorResult { let old = self.selectable; self.selectable = false; @@ -1561,7 +990,7 @@ impl Visitor for SelectableExpressions { expr: LiteralExpressionId, ) -> VisitorResult { if self.selectable { - self.error(h[expr].position) + self.error(h[expr].span.begin) } else { Ok(()) } diff --git a/src/protocol/parser/mod.rs b/src/protocol/parser/mod.rs index d4eb2b451287cfb26ab0e15ce4670342dde36c9c..3019bb25b7f8ac5560829531525e454724b2ca41 100644 --- a/src/protocol/parser/mod.rs +++ b/src/protocol/parser/mod.rs @@ -1,6 +1,5 @@ mod depth_visitor; pub(crate) mod symbol_table; -pub(crate) mod symbol_table2; pub(crate) mod type_table; pub(crate) mod tokens; pub(crate) mod token_parsing; @@ -8,48 +7,49 @@ pub(crate) mod pass_tokenizer; pub(crate) mod pass_symbols; pub(crate) mod pass_imports; pub(crate) mod pass_definitions; -mod type_resolver; +pub(crate) mod pass_validation_linking; +pub(crate) mod pass_typing; mod visitor; -mod pass_validation_linking; -mod utils; use depth_visitor::*; use tokens::*; use crate::collections::*; -use symbol_table2::SymbolTable; +use symbol_table::SymbolTable; use visitor::Visitor2; +use pass_tokenizer::PassTokenizer; +use pass_symbols::PassSymbols; +use pass_imports::PassImport; +use pass_definitions::PassDefinitions; use pass_validation_linking::PassValidationLinking; -use type_resolver::{TypeResolvingVisitor, ResolveQueue}; -use type_table::{TypeTable, TypeCtx}; +use pass_typing::{PassTyping, ResolveQueue}; +use type_table::TypeTable; use crate::protocol::ast::*; -use crate::protocol::input_source2::{InputSource2 as InputSource}; -use crate::protocol::lexer::*; +use crate::protocol::input_source::*; -use std::collections::HashMap; use crate::protocol::ast_printer::ASTWriter; -#[derive(PartialEq, Eq)] +#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)] pub enum ModuleCompilationPhase { Source, // only source is set Tokenized, // source is tokenized SymbolsScanned, // all definitions are linked to their type class ImportsResolved, // all imports are added to the symbol table DefinitionsParsed, // produced the AST for the entire module - TypesParsed, // added all definitions to the type table + TypesAddedToTable, // added all definitions to the type table ValidatedAndLinked, // AST is traversed and has linked the required AST nodes Typed, // Type inference and checking has been performed } pub struct Module { // Buffers - source: InputSource, - tokens: TokenBuffer, + pub source: InputSource, + pub tokens: TokenBuffer, // Identifiers - root_id: RootId, - name: Option<(PragmaId, StringRef<'static>)>, - version: Option<(PragmaId, i64)>, - phase: ModuleCompilationPhase, + pub root_id: RootId, + pub name: Option<(PragmaId, StringRef<'static>)>, + pub version: Option<(PragmaId, i64)>, + pub phase: ModuleCompilationPhase, } pub struct PassCtx<'a> { @@ -58,190 +58,91 @@ pub struct PassCtx<'a> { pool: &'a mut StringPool, } -// TODO: @fixme, pub qualifier -pub(crate) struct LexedModule { - pub(crate) source: InputSource, - module_name: Vec, - version: Option, - pub(crate) root_id: RootId, -} - pub struct Parser { pub(crate) heap: Heap, - pub(crate) modules: Vec, - pub(crate) module_lookup: HashMap, usize>, // from (optional) module name to `modules` idx + pub(crate) string_pool: StringPool, + pub(crate) modules: Vec, pub(crate) symbol_table: SymbolTable, pub(crate) type_table: TypeTable, + // Compiler passes + pass_tokenizer: PassTokenizer, + pass_symbols: PassSymbols, + pass_import: PassImport, + pass_definitions: PassDefinitions, + pass_validation: PassValidationLinking, + pass_typing: PassTyping, } impl Parser { pub fn new() -> Self { Parser{ heap: Heap::new(), + string_pool: StringPool::new(), modules: Vec::new(), - module_lookup: HashMap::new(), symbol_table: SymbolTable::new(), type_table: TypeTable::new(), + pass_tokenizer: PassTokenizer::new(), + pass_symbols: PassSymbols::new(), + pass_import: PassImport::new(), + pass_definitions: PassDefinitions::new(), + pass_validation: PassValidationLinking::new(), + pass_typing: PassTyping::new(), } } - pub fn feed(&mut self, mut source: InputSource) -> Result { - // Lex the input source - let mut lex = Lexer::new(&mut source); - let pd = lex.consume_protocol_description(&mut self.heap)?; - - // Seek the module name and version - let root = &self.heap[pd]; - let mut module_name_pos = InputPosition::default(); - let mut module_name = Vec::new(); - let mut module_version_pos = InputPosition::default(); - let mut module_version = None; - - for pragma in &root.pragmas { - match &self.heap[*pragma] { - Pragma::Module(module) => { - if !module_name.is_empty() { - return Err( - ParseError::new_error(&source, module.position, "Double definition of module name in the same file") - .with_postfixed_info(&source, module_name_pos, "Previous definition was here") - ) - } - - module_name_pos = module.position.clone(); - module_name = module.value.clone(); - }, - Pragma::Version(version) => { - if module_version.is_some() { - return Err( - ParseError::new_error(&source, version.position, "Double definition of module version") - .with_postfixed_info(&source, module_version_pos, "Previous definition was here") - ) - } - - module_version_pos = version.position.clone(); - module_version = Some(version.version); - }, - } - } - - // Add module to list of modules and prevent naming conflicts - let cur_module_idx = self.modules.len(); - if let Some(prev_module_idx) = self.module_lookup.get(&module_name) { - // Find `#module` statement in other module again - let prev_module = &self.modules[*prev_module_idx]; - let prev_module_pos = self.heap[prev_module.root_id].pragmas - .iter() - .find_map(|p| { - match &self.heap[*p] { - Pragma::Module(module) => Some(module.position.clone()), - _ => None - } - }) - .unwrap_or(InputPosition::default()); - - let module_name_msg = if module_name.is_empty() { - format!("a nameless module") - } else { - format!("module '{}'", String::from_utf8_lossy(&module_name)) - }; - - return Err( - ParseError::new_error(&source, module_name_pos, &format!("Double definition of {} across files", module_name_msg)) - .with_postfixed_info(&prev_module.source, prev_module_pos, "Other definition was here") - ); - } + pub fn feed(&mut self, mut source: InputSource) -> Result<(), ParseError> { + // TODO: @Optimize + let mut token_buffer = TokenBuffer::new(); + self.pass_tokenizer.tokenize(&mut source, &mut token_buffer)?; - self.modules.push(LexedModule{ + let module = Module{ source, - module_name: module_name.clone(), - version: module_version, - root_id: pd - }); - self.module_lookup.insert(module_name, cur_module_idx); - Ok(pd) - } - - fn resolve_symbols_and_types(&mut self) -> Result<(), ParseError> { - // Construct the symbol table to resolve any imports and/or definitions, - // then use the symbol table to actually annotate all of the imports. - // If the type table is constructed correctly then all imports MUST be - // resolvable. - self.symbol_table.build(&self.heap, &self.modules)?; - - // Not pretty, but we need to work around rust's borrowing rules, it is - // totally safe to mutate the contents of an AST element that we are - // not borrowing anywhere else. - let mut module_index = 0; - let mut import_index = 0; - loop { - if module_index >= self.modules.len() { - break; - } - - let module_root_id = self.modules[module_index].root_id; - let import_id = { - let root = &self.heap[module_root_id]; - if import_index >= root.imports.len() { - module_index += 1; - import_index = 0; - continue - } - root.imports[import_index] - }; + tokens: token_buffer, + root_id: RootId::new_invalid(), + name: None, + version: None, + phase: ModuleCompilationPhase::Tokenized, + }; + self.modules.push(module); - let import = &mut self.heap[import_id]; - match import { - Import::Module(import) => { - debug_assert!(import.module_id.is_none(), "module import already resolved"); - let target_module_id = self.symbol_table.resolve_module(&import.module) - .expect("module import is resolved by symbol table"); - import.module_id = Some(target_module_id) - }, - Import::Symbols(import) => { - debug_assert!(import.module_id.is_none(), "module of symbol import already resolved"); - let target_module_id = self.symbol_table.resolve_module(&import.module) - .expect("symbol import's module is resolved by symbol table"); - import.module_id = Some(target_module_id); + Ok(()) + } - for symbol in &mut import.symbols { - debug_assert!(symbol.definition_id.is_none(), "symbol import already resolved"); - let (_, target_definition_id) = self.symbol_table.resolve_identifier(module_root_id, &symbol.alias) - .expect("symbol import is resolved by symbol table") - .as_definition() - .expect("symbol import does not resolve to namespace symbol"); - symbol.definition_id = Some(target_definition_id); - } - } - } + pub fn parse(&mut self) -> Result<(), ParseError> { + let mut pass_ctx = PassCtx{ + heap: &mut self.heap, + symbols: &mut self.symbol_table, + pool: &mut self.string_pool, + }; - import_index += 1; + // Advance all modules to the phase where all symbols are scanned + for module_idx in 0..self.modules.len() { + self.pass_symbols.parse(&mut self.modules, module_idx, &mut pass_ctx)?; } - // All imports in the AST are now annotated. We now use the symbol table - // to construct the type table. - let mut type_ctx = TypeCtx::new(&self.symbol_table, &mut self.heap, &self.modules); - self.type_table.build_base_types(&mut type_ctx)?; - - Ok(()) - } + // With all symbols scanned, perform further compilation until we can + // add all base types to the type table. + for module_idx in 0..self.modules.len() { + self.pass_import.parse(&mut self.modules, module_idx, &mut pass_ctx)?; + self.pass_definitions.parse(&mut self.modules, module_idx, &mut pass_ctx)?; + } - pub fn parse(&mut self) -> Result<(), ParseError> { - self.resolve_symbols_and_types()?; + // Add every known type to the type table + self.type_table.build_base_types(&mut self.modules, &mut pass_ctx)?; - // Validate and link all modules - let mut visit = PassValidationLinking::new(); - for module in &self.modules { + // Continue compilation with the remaining phases now that the types + // are all in the type table + for module_idx in 0..self.modules.len() { let mut ctx = visitor::Ctx{ heap: &mut self.heap, - module, + module: &self.modules[module_idx], symbols: &mut self.symbol_table, types: &mut self.type_table, }; - visit.visit_module(&mut ctx)?; + self.pass_validation.visit_module(&mut ctx)?; } // Perform typechecking on all modules - let mut visit = TypeResolvingVisitor::new(); let mut queue = ResolveQueue::new(); for module in &self.modules { let ctx = visitor::Ctx{ @@ -250,7 +151,7 @@ impl Parser { symbols: &mut self.symbol_table, types: &mut self.type_table, }; - TypeResolvingVisitor::queue_module_definitions(&ctx, &mut queue); + PassTyping::queue_module_definitions(&ctx, &mut queue); }; while !queue.is_empty() { let top = queue.pop().unwrap(); @@ -260,7 +161,7 @@ impl Parser { symbols: &mut self.symbol_table, types: &mut self.type_table, }; - visit.handle_module_definition(&mut ctx, &mut queue, top)?; + self.pass_typing.handle_module_definition(&mut ctx, &mut queue, top)?; } // Perform remaining steps @@ -268,7 +169,7 @@ impl Parser { for module in &self.modules { let root_id = module.root_id; if let Err((position, message)) = Self::parse_inner(&mut self.heap, root_id) { - return Err(ParseError::new_error(&self.modules[0].source, position, &message)) + return Err(ParseError::new_error_str_at_pos(&self.modules[0].source, position, &message)) } } diff --git a/src/protocol/parser/pass_definitions.rs b/src/protocol/parser/pass_definitions.rs index bc60f3b48c7cb48e1d3b7cef1aca40cdf1d1eda4..9bc32f1e501abf47eb6c86f359c5a42b21b683bb 100644 --- a/src/protocol/parser/pass_definitions.rs +++ b/src/protocol/parser/pass_definitions.rs @@ -1,9 +1,9 @@ use crate::protocol::ast::*; -use super::symbol_table2::*; +use super::symbol_table::*; use super::{Module, ModuleCompilationPhase, PassCtx}; use super::tokens::*; use super::token_parsing::*; -use crate::protocol::input_source2::{InputSource2 as InputSource, InputPosition2 as InputPosition, InputSpan, ParseError}; +use crate::protocol::input_source::{InputSource as InputSource, InputPosition as InputPosition, InputSpan, ParseError}; use crate::collections::*; /// Parses all the tokenized definitions into actual AST nodes. @@ -12,7 +12,6 @@ pub(crate) struct PassDefinitions { cur_definition: DefinitionId, // Temporary buffers of various kinds buffer: String, - identifiers: Vec, struct_fields: Vec, enum_variants: Vec, union_variants: Vec, @@ -23,6 +22,20 @@ pub(crate) struct PassDefinitions { } impl PassDefinitions { + pub(crate) fn new() -> Self { + Self{ + cur_definition: DefinitionId::new_invalid(), + buffer: String::with_capacity(128), + struct_fields: Vec::with_capacity(128), + enum_variants: Vec::with_capacity(128), + union_variants: Vec::with_capacity(128), + parameters: ScopedBuffer::new_reserved(128), + expressions: ScopedBuffer::new_reserved(128), + statements: ScopedBuffer::new_reserved(128), + parser_types: Vec::with_capacity(128), + } + } + pub(crate) fn parse(&mut self, modules: &mut [Module], module_idx: usize, ctx: &mut PassCtx) -> Result<(), ParseError> { let module = &modules[module_idx]; let module_range = &module.tokens.ranges[0]; @@ -51,7 +64,7 @@ impl PassDefinitions { } } - + modules[module_idx].phase = ModuleCompilationPhase::DefinitionsParsed; Ok(()) } @@ -99,10 +112,10 @@ impl PassDefinitions { let poly_vars = ctx.heap[definition_id].poly_vars(); // Parse struct definition - consume_polymorphic_vars_spilled(source, iter)?; + consume_polymorphic_vars_spilled(&module.source, iter)?; debug_assert!(self.struct_fields.is_empty()); consume_comma_separated( - TokenKind::OpenCurly, TokenKind::CloseCurly, source, iter, + TokenKind::OpenCurly, TokenKind::CloseCurly, &module.source, iter, |source, iter| { let start_pos = iter.last_valid_pos(); let parser_type = consume_parser_type( @@ -139,10 +152,10 @@ impl PassDefinitions { let poly_vars = ctx.heap[definition_id].poly_vars(); // Parse enum definition - consume_polymorphic_vars_spilled(source, iter)?; + consume_polymorphic_vars_spilled(&module.source, iter)?; debug_assert!(self.enum_variants.is_empty()); consume_comma_separated( - TokenKind::OpenCurly, TokenKind::CloseCurly, source, iter, + TokenKind::OpenCurly, TokenKind::CloseCurly, &module.source, iter, |source, iter| { let identifier = consume_ident_interned(source, iter, ctx)?; let value = if iter.next() == Some(TokenKind::Equal) { @@ -178,10 +191,10 @@ impl PassDefinitions { let poly_vars = ctx.heap[definition_id].poly_vars(); // Parse union definition - consume_polymorphic_vars_spilled(source, iter)?; + consume_polymorphic_vars_spilled(&module.source, iter)?; debug_assert!(self.union_variants.is_empty()); consume_comma_separated( - TokenKind::OpenCurly, TokenKind::CloseCurly, source, iter, + TokenKind::OpenCurly, TokenKind::CloseCurly, &module.source, iter, |source, iter| { let identifier = consume_ident_interned(source, iter, ctx)?; let mut close_pos = identifier.span.end; @@ -234,7 +247,7 @@ impl PassDefinitions { // Parse function's argument list let mut parameter_section = self.parameters.start_section(); consume_parameter_list( - source, iter, ctx, &mut parameter_section, poly_vars, module_scope, definition_id + &module.source, iter, ctx, &mut parameter_section, poly_vars, module_scope, definition_id )?; let parameters = parameter_section.into_vec(); @@ -273,7 +286,7 @@ impl PassDefinitions { &mut self, module: &Module, iter: &mut TokenIter, ctx: &mut PassCtx ) -> Result<(), ParseError> { let (_variant_text, _) = consume_any_ident(&module.source, iter)?; - debug_assert!(variant_text == KW_PRIMITIVE || variant_text == KW_COMPOSITE); + debug_assert!(_variant_text == KW_PRIMITIVE || _variant_text == KW_COMPOSITE); let (ident_text, _) = consume_ident(&module.source, iter)?; // Retrieve preallocated definition @@ -285,7 +298,7 @@ impl PassDefinitions { // Parse component's argument list let mut parameter_section = self.parameters.start_section(); consume_parameter_list( - source, iter, ctx, &mut parameter_section, poly_vars, module_scope, definition_id + &module.source, iter, ctx, &mut parameter_section, poly_vars, module_scope, definition_id )?; let parameters = parameter_section.into_vec(); @@ -319,7 +332,7 @@ impl PassDefinitions { debug_assert_eq!(statements.len(), 1); let statements = statements.into_vec(); - ctx.heap.alloc_block_statement(|this| BlockStatement{ + Ok(ctx.heap.alloc_block_statement(|this| BlockStatement{ this, is_implicit: true, span: InputSpan::from_positions(wrap_begin_pos, wrap_end_pos), // TODO: @Span @@ -328,7 +341,7 @@ impl PassDefinitions { relative_pos_in_parent: 0, locals: Vec::new(), labels: Vec::new() - }) + })) } } @@ -343,7 +356,7 @@ impl PassDefinitions { let id = self.consume_block_statement(module, iter, ctx)?; section.push(id.upcast()); } else if next == TokenKind::Ident { - let (ident, _) = consume_any_ident(source, iter)?; + let (ident, _) = consume_any_ident(&module.source, iter)?; if ident == KW_STMT_IF { // Consume if statement and place end-if statement directly // after it. @@ -418,7 +431,7 @@ impl PassDefinitions { fn consume_block_statement( &mut self, module: &Module, iter: &mut TokenIter, ctx: &mut PassCtx ) -> Result { - let open_span = consume_token(source, iter, TokenKind::OpenCurly)?; + let open_span = consume_token(&module.source, iter, TokenKind::OpenCurly)?; self.consume_block_statement_without_leading_curly(module, iter, ctx, open_span.begin) } @@ -455,7 +468,7 @@ impl PassDefinitions { consume_token(&module.source, iter, TokenKind::CloseParen)?; let true_body = self.consume_block_or_wrapped_statement(module, iter, ctx)?; - let false_body = if has_ident(source, iter, KW_STMT_ELSE) { + let false_body = if has_ident(&module.source, iter, KW_STMT_ELSE) { iter.consume(); let false_body = self.consume_block_or_wrapped_statement(module, iter, ctx)?; Some(false_body) @@ -594,7 +607,7 @@ impl PassDefinitions { let expression = &ctx.heap[expression_id]; let mut valid = false; - let mut call_id = CallExpressionId.new_invalid(); + let mut call_id = CallExpressionId::new_invalid(); if let Expression::Call(expression) = expression { // Allow both components and functions, as it makes more sense to // check their correct use in the validation and linking pass @@ -606,8 +619,7 @@ impl PassDefinitions { if !valid { return Err(ParseError::new_error_str_at_span( - source, InputSpan::from_positions(start_pos, iter.last_valid_pos()), - "expected a call expression" + &module.source, InputSpan::from_positions(start_pos, iter.last_valid_pos()), "expected a call expression" )); } consume_token(&module.source, iter, TokenKind::SemiColon)?; @@ -626,14 +638,16 @@ impl PassDefinitions { ) -> Result { // Consume channel specification let channel_span = consume_exact_ident(&module.source, iter, KW_STMT_CHANNEL)?; - let channel_type = if Some(TokenKind::OpenAngle) = iter.next() { + let channel_type = if Some(TokenKind::OpenAngle) == iter.next() { // Retrieve the type of the channel, we're cheating a bit here by // consuming the first '<' and setting the initial angle depth to 1 // such that our final '>' will be consumed as well. iter.consume(); + let definition_id = self.cur_definition; + let poly_vars = ctx.heap[definition_id].poly_vars(); consume_parser_type( &module.source, iter, &ctx.symbols, &ctx.heap, - poly_vars, SymbolScope::Module(module.root_id), definition_id, + &poly_vars, SymbolScope::Module(module.root_id), definition_id, true, 1 )? } else { @@ -690,7 +704,7 @@ impl PassDefinitions { let stmt_id = ctx.heap.alloc_labeled_statement(|this| LabeledStatement { this, label, - body: *inner_section[0], + body: inner_section[0], relative_pos_in_block: 0, in_sync: None, }); @@ -870,7 +884,7 @@ impl PassDefinitions { let test = result; let true_expression = self.consume_expression(module, iter, ctx)?; - consume_token(source, iter, TokenKind::Colon)?; + consume_token(&module.source, iter, TokenKind::Colon)?; let false_expression = self.consume_expression(module, iter, ctx)?; Ok(ctx.heap.alloc_conditional_expression(|this| ConditionalExpression{ this, span, test, true_expression, false_expression, @@ -1019,7 +1033,7 @@ impl PassDefinitions { } fn consume_multiply_divide_or_modulus_expression( - &mut self, module: &Module, iter: &mut Tokeniter, ctx: &mut PassCtx + &mut self, module: &Module, iter: &mut TokenIter, ctx: &mut PassCtx ) -> Result { self.consume_generic_binary_expression( module, iter, ctx, @@ -1036,7 +1050,7 @@ impl PassDefinitions { fn consume_prefix_expression( &mut self, module: &Module, iter: &mut TokenIter, ctx: &mut PassCtx ) -> Result { - fn parse_prefix_token(token: Option) -> Some(UnaryOperation) { + fn parse_prefix_token(token: Option) -> Option { use TokenKind as TK; use UnaryOperation as UO; match token { @@ -1107,7 +1121,7 @@ impl PassDefinitions { // Check if we have an indexing or slicing operation next = iter.next(); - if Some(TokenKind::DotDot) = next { + if Some(TokenKind::DotDot) == next { iter.consume(); let to_index = self.consume_expression(module, iter, ctx)?; @@ -1119,7 +1133,7 @@ impl PassDefinitions { parent: ExpressionParent::None, concrete_type: ConcreteType::default() }).upcast(); - } else if Some(TokenKind::CloseSquare) { + } else if Some(TokenKind::CloseSquare) == next { let end_span = consume_token(&module.source, iter, TokenKind::CloseSquare)?; span.end = end_span.end; @@ -1143,7 +1157,7 @@ impl PassDefinitions { } else { let value = ctx.pool.intern(field_text); let identifier = Identifier{ value, span: field_span }; - Field::Symbolic(FieldSymbolic{ identifier, definition: None, field_idx: 0 }); + Field::Symbolic(FieldSymbolic{ identifier, definition: None, field_idx: 0 }) }; result = ctx.heap.alloc_select_expression(|this| SelectExpression{ @@ -1396,12 +1410,16 @@ impl PassDefinitions { ctx.heap.alloc_variable_expression(|this| VariableExpression { this, identifier, - declaration: NJone, + declaration: None, parent: ExpressionParent::None, concrete_type: ConcreteType::default() }).upcast() } } + } else { + return Err(ParseError::new_error_str_at_pos( + &module.source, iter.last_valid_pos(), "expected an expression" + )); }; Ok(result) @@ -1504,7 +1522,7 @@ fn consume_parser_type( // Start out with the first '<' consumed. iter.consume(); - enum State { Ident, Open, Close, Comma }; + enum State { Ident, Open, Close, Comma } let mut state = State::Open; let mut angle_depth = first_angle_depth + 1; diff --git a/src/protocol/parser/pass_imports.rs b/src/protocol/parser/pass_imports.rs index 9e76cabeb22181a21e7bfa30779446ea8ce86c05..00c833d2299b5fce7aa9eeb342875571483f6574 100644 --- a/src/protocol/parser/pass_imports.rs +++ b/src/protocol/parser/pass_imports.rs @@ -1,9 +1,9 @@ use crate::protocol::ast::*; -use super::symbol_table2::*; +use super::symbol_table::*; use super::{Module, ModuleCompilationPhase, PassCtx}; use super::tokens::*; use super::token_parsing::*; -use crate::protocol::input_source2::{InputSource2 as InputSource, InputSpan, ParseError}; +use crate::protocol::input_source::{InputSource as InputSource, InputSpan, ParseError}; use crate::collections::*; /// Parses all the imports in the module tokens. Is applied after the @@ -117,12 +117,12 @@ impl PassImport { // Consume symbol name and make sure it points to an existing definition let symbol_identifier = consume_ident_interned(source, iter, ctx)?; let target = ctx.symbols.get_symbol_by_name_defined_in_scope( - SymbolScope::Module(module_root_id), symbol + SymbolScope::Module(module_root_id), symbol_identifier.value.as_bytes() ); if target.is_none() { return Err(ParseError::new_error_at_span( - source, symbol_span, + source, symbol_identifier.span, format!( "could not find symbol '{}' within module '{}'", symbol_identifier.value.as_str(), module_name.as_str() @@ -154,7 +154,7 @@ impl PassImport { let next = iter.next(); - if Some(TokenKind::Ident) = next { + if Some(TokenKind::Ident) == next { // Importing a single symbol iter.consume(); let (imported_symbol, symbol_definition) = consume_symbol_and_maybe_alias( @@ -179,11 +179,11 @@ impl PassImport { modules, module_idx, ctx, &new_symbol, old_symbol )); } - } else if Some(TokenKind::OpenCurly) = next { + } else if Some(TokenKind::OpenCurly) == next { // Importing multiple symbols let mut end_of_list = iter.last_valid_pos(); consume_comma_separated( - TokenKind::OpenCurly, TokenKind::CloseCurly, source, &mut iter, + TokenKind::OpenCurly, TokenKind::CloseCurly, &module.source, &mut iter, |source, iter| consume_symbol_and_maybe_alias( source, iter, ctx, &module_identifier.value, target_root_id ), @@ -218,7 +218,7 @@ impl PassImport { return Err(construct_symbol_conflict_error(modules, module_idx, ctx, &new_symbol, old_symbol)); } } - } else if Some(TokenKind::Star) = next { + } else if Some(TokenKind::Star) == next { // Import all symbols from the module let star_span = iter.next_span(); @@ -273,7 +273,7 @@ impl PassImport { } else { // Assume implicit alias let module_name_str = module_identifier.value.clone(); - let last_ident_start = module_name_str.rfind('.').map_or(0, |v| v + 1); + let last_ident_start = module_name_str.as_str().rfind('.').map_or(0, |v| v + 1); let alias_text = &module_name_str.as_bytes()[last_ident_start..]; let alias = ctx.pool.intern(alias_text); let alias_span = InputSpan::from_positions( diff --git a/src/protocol/parser/pass_symbols.rs b/src/protocol/parser/pass_symbols.rs index 3210f1b578465bbc046d758b649bae82aa6c3cb5..c4cd05cf89a14395488dc57f37390a64cfe4e42e 100644 --- a/src/protocol/parser/pass_symbols.rs +++ b/src/protocol/parser/pass_symbols.rs @@ -1,6 +1,6 @@ use crate::protocol::ast::*; -use super::symbol_table2::*; -use crate::protocol::input_source2::{ParseError, InputSpan}; +use super::symbol_table::*; +use crate::protocol::input_source::{ParseError, InputSpan}; use super::tokens::*; use super::token_parsing::*; use super::{Module, ModuleCompilationPhase, PassCtx}; @@ -105,19 +105,19 @@ impl PassSymbols { let mut iter = module.tokens.iter_range(range); // Consume pragma name - let (pragma_section, pragma_start, _) = consume_pragma(&self.source, &mut iter)?; + let (pragma_section, pragma_start, _) = consume_pragma(&module.source, &mut iter)?; // Consume pragma values - if pragma_section == "#module" { + if pragma_section == b"#module" { // Check if name is defined twice within the same file if self.has_pragma_module { - return Err(ParseError::new_error(&module.source, pragma_start, "module name is defined twice")); + return Err(ParseError::new_error_str_at_pos(&module.source, pragma_start, "module name is defined twice")); } // Consume the domain-name let (module_name, module_span) = consume_domain_ident(&module.source, &mut iter)?; if iter.next().is_some() { - return Err(ParseError::new_error(&module.source, iter.last_valid_pos(), "expected end of #module pragma after module name")); + return Err(ParseError::new_error_str_at_pos(&module.source, iter.last_valid_pos(), "expected end of #module pragma after module name")); } // Add to heap and symbol table @@ -143,10 +143,10 @@ impl PassSymbols { )); } self.has_pragma_module = true; - } else if pragma_section == "#version" { + } else if pragma_section == b"#version" { // Check if version is defined twice within the same file if self.has_pragma_version { - return Err(ParseError::new_error(&module.source, pragma_start, "module version is defined twice")); + return Err(ParseError::new_error_str_at_pos(&module.source, pragma_start, "module version is defined twice")); } // Consume the version pragma @@ -161,7 +161,7 @@ impl PassSymbols { } else { // Custom pragma, maybe we support this in the future, but for now // we don't. - return Err(ParseError::new_error(&module.source, pragma_start, "illegal pragma name")); + return Err(ParseError::new_error_str_at_pos(&module.source, pragma_start, "illegal pragma name")); } Ok(()) @@ -188,6 +188,7 @@ impl PassSymbols { &mut poly_vars, "a polymorphic variable", None )?; let ident_text = identifier.value.clone(); // because we need it later + let ident_span = identifier.span.clone(); // Reserve space in AST for definition and add it to the symbol table let definition_class; @@ -195,28 +196,28 @@ impl PassSymbols { match kw_text { KW_STRUCT => { let struct_def_id = ctx.heap.alloc_struct_definition(|this| { - StructDefinition::new_empty(this, definition_span, identifier, poly_vars) + StructDefinition::new_empty(this, module.root_id, definition_span, identifier, poly_vars) }); definition_class = DefinitionClass::Struct; ast_definition_id = struct_def_id.upcast(); }, KW_ENUM => { let enum_def_id = ctx.heap.alloc_enum_definition(|this| { - EnumDefinition::new_empty(this, definition_span, identifier, poly_vars) + EnumDefinition::new_empty(this, module.root_id, definition_span, identifier, poly_vars) }); definition_class = DefinitionClass::Enum; ast_definition_id = enum_def_id.upcast(); }, KW_UNION => { let union_def_id = ctx.heap.alloc_union_definition(|this| { - UnionDefinition::new_empty(this, definition_span, identifier, poly_vars) + UnionDefinition::new_empty(this, module.root_id, definition_span, identifier, poly_vars) }); definition_class = DefinitionClass::Union; ast_definition_id = union_def_id.upcast() }, KW_FUNCTION => { let func_def_id = ctx.heap.alloc_function_definition(|this| { - FunctionDefinition::new_empty(this, definition_span, identifier, poly_vars) + FunctionDefinition::new_empty(this, module.root_id, definition_span, identifier, poly_vars) }); definition_class = DefinitionClass::Function; ast_definition_id = func_def_id.upcast(); @@ -228,7 +229,7 @@ impl PassSymbols { ComponentVariant::Composite }; let comp_def_id = ctx.heap.alloc_component_definition(|this| { - ComponentDefinition::new_empty(this, definition_span, component_variant, identifier, poly_vars) + ComponentDefinition::new_empty(this, module.root_id, definition_span, component_variant, identifier, poly_vars) }); definition_class = DefinitionClass::Component; ast_definition_id = comp_def_id.upcast(); @@ -242,7 +243,7 @@ impl PassSymbols { defined_in_module: module.root_id, defined_in_scope: SymbolScope::Module(module.root_id), definition_span, - identifier_span, + identifier_span: ident_span, imported_at: None, class: definition_class, definition_id: ast_definition_id, diff --git a/src/protocol/parser/pass_tokenizer.rs b/src/protocol/parser/pass_tokenizer.rs index 288fb9ecc4b1e597a23440610a10b02f2d5dd940..a71cbfef685bc89dc427e87410acb5a8995fc707 100644 --- a/src/protocol/parser/pass_tokenizer.rs +++ b/src/protocol/parser/pass_tokenizer.rs @@ -1,7 +1,7 @@ -use crate::protocol::input_source2::{ - InputSource2 as InputSource, +use crate::protocol::input_source::{ + InputSource as InputSource, ParseError, - InputPosition2 as InputPosition, + InputPosition as InputPosition, InputSpan }; @@ -43,7 +43,6 @@ impl PassTokenizer { // Set up for tokenization by pushing the first range onto the stack. // This range may get transformed into the appropriate range kind later, // see `push_range` and `pop_range`. - self.curly_depth = 0; self.stack_idx = 0; target.ranges.push(TokenRange{ parent_idx: 0, @@ -101,7 +100,7 @@ impl PassTokenizer { // Check if this marks the end of a range we're // currently processing if self.curly_stack.is_empty() { - return Err(ParseError::new_error( + return Err(ParseError::new_error_str_at_pos( source, token_pos, "unmatched closing curly brace '}'" )); } @@ -109,7 +108,7 @@ impl PassTokenizer { self.curly_stack.pop(); let range = &target.ranges[self.stack_idx]; - if range.range_kind == TokenRangeKind::Definition && range.curly_depth == self.curly_depth { + if range.range_kind == TokenRangeKind::Definition && range.curly_depth == self.curly_stack.len() as u32 { self.pop_range(target, target.tokens.len() as u32); } @@ -123,7 +122,9 @@ impl PassTokenizer { } } } else { - return Err(ParseError::new_error(source, source.pos(), "unexpected character")); + return Err(ParseError::new_error_str_at_pos( + source, source.pos(), "unexpected character" + )); } } } @@ -137,7 +138,7 @@ impl PassTokenizer { // Let's not add a lot of heuristics and just tell the programmer // that something is wrong let last_unmatched_open = self.curly_stack.pop().unwrap(); - return Err(ParseError::new_error( + return Err(ParseError::new_error_str_at_pos( source, last_unmatched_open, "unmatched opening curly brace '{'" )); } @@ -320,15 +321,15 @@ impl PassTokenizer { } else if first_char == b'>' { source.consume(); let next = source.next(); - if let Some(b'>') = next { + if Some(b'>') == next { source.consume(); - if let Some(b'=') = source.next() { + if Some(b'=') == source.next() { source.consume(); token_kind = TokenKind::ShiftRightEquals; } else { token_kind = TokenKind::ShiftRight; } - } else if Some(b'=') = next { + } else if Some(b'=') == next { source.consume(); token_kind = TokenKind::GreaterEquals; } else { @@ -391,7 +392,7 @@ impl PassTokenizer { let mut prev_char = b'\''; while let Some(c) = source.next() { if !c.is_ascii() { - return Err(ParseError::new_error(source, source.pos(), "non-ASCII character in char literal")); + return Err(ParseError::new_error_str_at_pos(source, source.pos(), "non-ASCII character in char literal")); } source.consume(); @@ -406,7 +407,7 @@ impl PassTokenizer { if prev_char != b'\'' { // Unterminated character literal, reached end of file. - return Err(ParseError::new_error(source, begin_pos, "encountered unterminated character literal")); + return Err(ParseError::new_error_str_at_pos(source, begin_pos, "encountered unterminated character literal")); } let end_pos = source.pos(); @@ -427,7 +428,7 @@ impl PassTokenizer { let mut prev_char = b'"'; while let Some(c) = source.next() { if !c.is_ascii() { - return Err(ParseError::new_error(source, source.pos(), "non-ASCII character in string literal")); + return Err(ParseError::new_error_str_at_pos(source, source.pos(), "non-ASCII character in string literal")); } source.consume(); @@ -441,7 +442,7 @@ impl PassTokenizer { if prev_char != b'"' { // Unterminated string literal - return Err(ParseError::new_error(source, begin_pos, "encountered unterminated string literal")); + return Err(ParseError::new_error_str_at_pos(source, begin_pos, "encountered unterminated string literal")); } let end_pos = source.pos(); @@ -548,7 +549,9 @@ impl PassTokenizer { } if !is_closed { - return Err(ParseError::new_error(source, source.pos(), "encountered unterminated block comment")); + return Err(ParseError::new_error_str_at_pos( + source, source.pos(), "encountered unterminated block comment") + ); } let end_pos = source.pos(); @@ -646,7 +649,7 @@ impl PassTokenizer { target.ranges.push(TokenRange{ parent_idx: self.stack_idx, range_kind: TokenRangeKind::Code, - curly_depth: self.curly_depth, + curly_depth: self.curly_stack.len() as u32, start: code_start, end: first_token, num_child_ranges: 0, @@ -672,7 +675,7 @@ impl PassTokenizer { target.ranges.push(TokenRange{ parent_idx, range_kind, - curly_depth: self.curly_depth, + curly_depth: self.curly_stack.len() as u32, start: first_token, end: first_token, num_child_ranges: 0, @@ -701,7 +704,7 @@ impl PassTokenizer { fn check_ascii(&self, source: &InputSource) -> Result<(), ParseError> { match source.next() { Some(c) if !c.is_ascii() => { - Err(ParseError::new_error(source, source.pos(), "encountered a non-ASCII character")) + Err(ParseError::new_error_str_at_pos(source, source.pos(), "encountered a non-ASCII character")) }, _else => { Ok(()) diff --git a/src/protocol/parser/type_resolver.rs b/src/protocol/parser/pass_typing.rs similarity index 97% rename from src/protocol/parser/type_resolver.rs rename to src/protocol/parser/pass_typing.rs index 88113d78c3296b6f75793ab6369f5e2360a46d46..2f486ae045906454008c3ca1c30052576a06c510 100644 --- a/src/protocol/parser/type_resolver.rs +++ b/src/protocol/parser/pass_typing.rs @@ -1,4 +1,4 @@ -/// type_resolver.rs +/// pass_typing /// /// Performs type inference and type checking. Type inference is implemented by /// applying constraints on (sub)trees of types. During this process the @@ -54,10 +54,11 @@ macro_rules! debug_log { }; } -use std::collections::{HashMap, HashSet, VecDeque}; +use std::collections::{HashMap, HashSet}; use crate::protocol::ast::*; -use crate::protocol::input_source2::{InputSource2 as InputSource, ParseError}; +use crate::protocol::input_source::ParseError; +use crate::protocol::parser::ModuleCompilationPhase; use crate::protocol::parser::type_table::*; use super::visitor::{ STMT_BUFFER_INIT_CAPACITY, @@ -68,7 +69,7 @@ use super::visitor::{ }; use std::collections::hash_map::Entry; -const MESSAGE_TEMPLATE: [InferenceTypePart; 2] = [ InferenceTypePart::Message, InferenceTypePart::Byte ]; +const MESSAGE_TEMPLATE: [InferenceTypePart; 2] = [ InferenceTypePart::Message, InferenceTypePart::UInt8 ]; const BOOL_TEMPLATE: [InferenceTypePart; 1] = [ InferenceTypePart::Bool ]; const CHARACTER_TEMPLATE: [InferenceTypePart; 1] = [ InferenceTypePart::Character ]; const STRING_TEMPLATE: [InferenceTypePart; 1] = [ InferenceTypePart::String ]; @@ -833,7 +834,7 @@ pub(crate) type ResolveQueue = Vec; /// This particular visitor will recurse depth-first into the AST and ensures /// that all expressions have the appropriate types. -pub(crate) struct TypeResolvingVisitor { +pub(crate) struct PassTyping { // Current definition we're typechecking. definition_type: DefinitionType, poly_vars: Vec, @@ -880,9 +881,9 @@ impl VarData { } } -impl TypeResolvingVisitor { +impl PassTyping { pub(crate) fn new() -> Self { - TypeResolvingVisitor{ + PassTyping { definition_type: DefinitionType::None, poly_vars: Vec::new(), stmt_buffer: Vec::with_capacity(STMT_BUFFER_INIT_CAPACITY), @@ -897,6 +898,7 @@ impl TypeResolvingVisitor { // TODO: @cleanup Unsure about this, maybe a pattern will arise after // a while. pub(crate) fn queue_module_definitions(ctx: &Ctx, queue: &mut ResolveQueue) { + debug_assert_eq!(ctx.module.phase, ModuleCompilationPhase::ValidatedAndLinked); let root_id = ctx.module.root_id; let root = &ctx.heap.protocol_descriptions[root_id]; for definition_id in &root.definitions { @@ -952,7 +954,7 @@ impl TypeResolvingVisitor { } } -impl Visitor2 for TypeResolvingVisitor { +impl Visitor2 for PassTyping { // Definitions fn visit_component_definition(&mut self, ctx: &mut Ctx, id: ComponentDefinitionId) -> VisitorResult { @@ -1309,7 +1311,7 @@ macro_rules! debug_assert_ptrs_distinct { }; } -impl TypeResolvingVisitor { +impl PassTyping { fn resolve_types(&mut self, ctx: &mut Ctx, queue: &mut ResolveQueue) -> Result<(), ParseError> { // Keep inferring until we can no longer make any progress while let Some(next_expr_id) = self.expr_queued.iter().next() { @@ -3031,9 +3033,9 @@ impl TypeResolvingVisitor { debug_assert_eq!(variant_definition.embedded.len(), literal.values.len()); let mut embedded = Vec::with_capacity(variant_definition.embedded.len()); - for embedded_id in &variant_definition.embedded { + for embedded_parser_type in &variant_definition.embedded { let inference_type = self.determine_inference_type_from_parser_type( - ctx, *embedded_id, false + ctx, embedded_parser_type, false ); embedded.push(inference_type); } @@ -3163,7 +3165,7 @@ impl TypeResolvingVisitor { infer_type.push(ITP::MarkerDefinition(poly_arg_idx as usize)); for concrete_part in &self.poly_vars[poly_arg_idx].parts { - infer_types.push(ITP::from(*concrete_part)); + infer_type.push(ITP::from(*concrete_part)); } } else { // Polymorphic argument has to be inferred @@ -3289,62 +3291,21 @@ impl TypeResolvingVisitor { } // Helpers function to retrieve polyvar name and definition name - fn get_poly_var_and_func_name(ctx: &Ctx, poly_var_idx: usize, expr: &CallExpression) -> (String, String) { - match &expr.method { - Method::Create => unreachable!(), - Method::Fires => (String::from('T'), String::from("fires")), - Method::Get => (String::from('T'), String::from("get")), - Method::Put => (String::from('T'), String::from("put")), - Method::Symbolic(symbolic) => { - let definition = &ctx.heap[symbolic.definition.unwrap()]; - let poly_var = match definition { - Definition::Struct(_) | Definition::Enum(_) | Definition::Union(_) => unreachable!(), - Definition::Function(definition) => { - String::from_utf8_lossy(&definition.poly_vars[poly_var_idx].value).to_string() - }, - Definition::Component(definition) => { - String::from_utf8_lossy(&definition.poly_vars[poly_var_idx].value).to_string() - } - }; - let func_name = String::from_utf8_lossy(&symbolic.identifier.value).to_string(); - (poly_var, func_name) - } - } - } - - fn get_poly_var_and_type_name(ctx: &Ctx, poly_var_idx: usize, definition_id: DefinitionId) -> (String, String) { + fn get_poly_var_and_definition_name<'a>(ctx: &'a Ctx, poly_var_idx: usize, definition_id: DefinitionId) -> (&'a str, &'a str) { let definition = &ctx.heap[definition_id]; - let (poly_var_name, type_name) = match definition { - Definition::Function(_) | Definition::Component(_) => - unreachable!("get_poly_var_and_type_name called on unsupported type"), - Definition::Enum(definition) => ( - &definition.poly_vars[poly_var_idx].value, - &definition.identifier.value - ), - Definition::Struct(definition) => ( - &definition.poly_vars[poly_var_idx].value, - &definition.identifier.value - ), - Definition::Union(definition) => ( - &definition.poly_vars[poly_var_idx].value, - &definition.identifier.value - ), - }; + let poly_var = definition.poly_vars()[poly_var_idx].value.as_str(); + let func_name = definition.identifier().value.as_str(); - ( - String::from_utf8_lossy(poly_var_name).to_string(), - String::from_utf8_lossy(type_name).to_string() - ) + (poly_var, func_name) } // Helper function to construct initial error fn construct_main_error(ctx: &Ctx, poly_var_idx: usize, expr: &Expression) -> ParseError { match expr { Expression::Call(expr) => { - let (poly_var, func_name) = get_poly_var_and_func_name(ctx, poly_var_idx, expr); - return ParseError::new_error( - &ctx.module.source, expr.position(), - &format!( + let (poly_var, func_name) = get_poly_var_and_definition_name(ctx, poly_var_idx, expr.definition); + return ParseError::new_error_at_span( + &ctx.module.source, expr.span, format!( "Conflicting type for polymorphic variable '{}' of '{}'", poly_var, func_name ) @@ -3358,27 +3319,25 @@ impl TypeResolvingVisitor { _ => unreachable!(), }; - let (poly_var, struct_name) = get_poly_var_and_type_name(ctx, poly_var_idx, definition_id); - return ParseError::new_error( - &ctx.module.source, expr.position(), - &format!( + let (poly_var, type_name) = get_poly_var_and_definition_name(ctx, poly_var_idx, definition_id); + return ParseError::new_error_at_span( + &ctx.module.source, expr.span, format!( "Conflicting type for polymorphic variable '{}' of instantiation of '{}'", - poly_var, struct_name + poly_var, type_name ) - ) + ); }, Expression::Select(expr) => { let field = expr.field.as_symbolic(); - let (poly_var, struct_name) = get_poly_var_and_type_name(ctx, poly_var_idx, field.definition.unwrap()); - return ParseError::new_error( - &ctx.module.source, expr.position(), - &format!( + let (poly_var, struct_name) = get_poly_var_and_definition_name(ctx, poly_var_idx, field.definition.unwrap()); + return ParseError::new_error_at_span( + &ctx.module.source, expr.position(), format!( "Conflicting type for polymorphic variable '{}' while accessing field '{}' of '{}'", - poly_var, &String::from_utf8_lossy(&field.identifier.value), struct_name + poly_var, field.identifier.value.as_str(), struct_name ) ) } - _ => unreachable!("called construct_poly_arg_error without a call/literal expression") + _ => unreachable!("called construct_poly_arg_error without an expected expression, got: {:?}", expr) } } @@ -3417,15 +3376,14 @@ impl TypeResolvingVisitor { &poly_data.returned, &poly_data.returned ) { return construct_main_error(ctx, poly_idx, expr) - .with_postfixed_info( - &ctx.module.source, expr.position(), - &format!( + .with_info_at_span( + &ctx.module.source, expr.span(), format!( "The {} inferred the conflicting types '{}' and '{}'", expr_return_name, InferenceType::partial_display_name(&ctx.heap, section_a), InferenceType::partial_display_name(&ctx.heap, section_b) ) - ) + ); } // - check arguments with each other argument and with return type @@ -3440,26 +3398,23 @@ impl TypeResolvingVisitor { if arg_a_idx == arg_b_idx { // Same argument let arg = &ctx.heap[expr_args[arg_a_idx]]; - return error.with_postfixed_info( - &ctx.module.source, arg.position(), - &format!( + return error.with_info_at_span( + &ctx.module.source, arg.span(), format!( "This argument inferred the conflicting types '{}' and '{}'", InferenceType::partial_display_name(&ctx.heap, section_a), InferenceType::partial_display_name(&ctx.heap, section_b) ) - ) + ); } else { let arg_a = &ctx.heap[expr_args[arg_a_idx]]; let arg_b = &ctx.heap[expr_args[arg_b_idx]]; - return error.with_postfixed_info( - &ctx.module.source, arg_a.position(), - &format!( + return error.with_info_at_span( + &ctx.module.source, arg_a.span(), format!( "This argument inferred it to '{}'", InferenceType::partial_display_name(&ctx.heap, section_a) ) - ).with_postfixed_info( - &ctx.module.source, arg_b.position(), - &format!( + ).with_info_at_span( + &ctx.module.source, arg_b.span(), format!( "While this argument inferred it to '{}'", InferenceType::partial_display_name(&ctx.heap, section_b) ) @@ -3472,21 +3427,19 @@ impl TypeResolvingVisitor { if let Some((poly_idx, section_arg, section_ret)) = has_poly_mismatch(arg_a, &poly_data.returned) { let arg = &ctx.heap[expr_args[arg_a_idx]]; return construct_main_error(ctx, poly_idx, expr) - .with_postfixed_info( - &ctx.module.source, arg.position(), - &format!( + .with_info_at_span( + &ctx.module.source, arg.span(), format!( "This argument inferred it to '{}'", InferenceType::partial_display_name(&ctx.heap, section_arg) ) ) .with_postfixed_info( - &ctx.module.source, expr.position(), - &format!( + &ctx.module.source, expr.span(), format!( "While the {} inferred it to '{}'", expr_return_name, InferenceType::partial_display_name(&ctx.heap, section_ret) ) - ) + ); } } diff --git a/src/protocol/parser/pass_validation_linking.rs b/src/protocol/parser/pass_validation_linking.rs index d41960c2440260800c6c6833f04f08d3d5f5ecef..2fb9c92a521d6972e6a508f249fb861b62b31fe2 100644 --- a/src/protocol/parser/pass_validation_linking.rs +++ b/src/protocol/parser/pass_validation_linking.rs @@ -1,16 +1,12 @@ use crate::collections::{ScopedBuffer}; use crate::protocol::ast::*; -use crate::protocol::input_source2::{InputSource2 as InputSource, InputSpan, ParseError}; -use crate::protocol::parser::{ - symbol_table2::*, - type_table::*, - utils::*, -}; +use crate::protocol::input_source::*; +use crate::protocol::parser::symbol_table::*; +use crate::protocol::parser::type_table::*; use super::visitor::{ STMT_BUFFER_INIT_CAPACITY, EXPR_BUFFER_INIT_CAPACITY, - TYPE_BUFFER_INIT_CAPACITY, Ctx, Visitor2, VisitorResult @@ -86,7 +82,7 @@ impl PassValidationLinking { in_while: None, cur_scope: None, expr_parent: ExpressionParent::None, - def_type: DefinitionType::None, + def_type: DefinitionType::Function(FunctionDefinitionId::new_invalid()), relative_pos_in_block: 0, statement_buffer: ScopedBuffer::new_reserved(STMT_BUFFER_INIT_CAPACITY), expression_buffer: ScopedBuffer::new_reserved(EXPR_BUFFER_INIT_CAPACITY), @@ -98,10 +94,8 @@ impl PassValidationLinking { self.in_while = None; self.cur_scope = None; self.expr_parent = ExpressionParent::None; - self.def_type = DefinitionType::None; + self.def_type = DefinitionType::Function(FunctionDefinitionId::new_invalid()); self.relative_pos_in_block = 0; - self.statement_buffer.clear(); - self.expression_buffer.clear(); } } @@ -123,8 +117,6 @@ impl Visitor2 for PassValidationLinking { // Visit statements in component body let body_id = ctx.heap[id].body; self.visit_block_stmt(ctx, body_id)?; - - self.check_post_definition_state(); Ok(()) } @@ -139,8 +131,6 @@ impl Visitor2 for PassValidationLinking { // Visit statements in function body let body_id = ctx.heap[id].body; self.visit_block_stmt(ctx, body_id)?; - - self.check_post_definition_state(); Ok(()) } @@ -238,15 +228,15 @@ impl Visitor2 for PassValidationLinking { let cur_sync_span = ctx.heap[id].span; if self.in_sync.is_some() { // Nested synchronous statement - let old_sync_span = &ctx.heap[self.in_sync.unwrap()].span; + let old_sync_span = ctx.heap[self.in_sync.unwrap()].span; return Err(ParseError::new_error_str_at_span( &ctx.module.source, cur_sync_span, "Illegal nested synchronous statement" ).with_info_str_at_span( - &ctx.module.source, old_sync_span.position, "It is nested in this synchronous statement" + &ctx.module.source, old_sync_span, "It is nested in this synchronous statement" )); } - if self.def_type != DefinitionType::Primitive { + if !self.def_type.is_primitive() { return Err(ParseError::new_error_str_at_span( &ctx.module.source, cur_sync_span, "synchronous statements may only be used in primitive components" @@ -264,7 +254,7 @@ impl Visitor2 for PassValidationLinking { fn visit_return_stmt(&mut self, ctx: &mut Ctx, id: ReturnStatementId) -> VisitorResult { // Check if "return" occurs within a function let stmt = &ctx.heap[id]; - if self.def_type != DefinitionType::Function { + if !self.def_type.is_function() { return Err(ParseError::new_error_str_at_span( &ctx.module.source, stmt.span, "return statements may only appear in function bodies" @@ -304,7 +294,7 @@ impl Visitor2 for PassValidationLinking { fn visit_new_stmt(&mut self, ctx: &mut Ctx, id: NewStatementId) -> VisitorResult { // Make sure the new statement occurs inside a composite component - if self.def_type != DefinitionType::Composite { + if !self.def_type.is_composite() { let new_stmt = &ctx.heap[id]; return Err(ParseError::new_error_str_at_span( &ctx.module.source, new_stmt.span, @@ -649,7 +639,7 @@ impl Visitor2 for PassValidationLinking { let mut expected_wrapping_new_stmt = false; match &mut call_expr.method { Method::Get => { - if self.def_type != DefinitionType::Primitive { + if !self.def_type.is_primitive() { return Err(ParseError::new_error_str_at_span( &ctx.module.source, call_expr.span, "a call to 'get' may only occur in primitive component definitions" @@ -663,7 +653,7 @@ impl Visitor2 for PassValidationLinking { } }, Method::Put => { - if self.def_type != DefinitionType::Primitive { + if !self.def_type.is_primitive() { return Err(ParseError::new_error_str_at_span( &ctx.module.source, call_expr.span, "a call to 'put' may only occur in primitive component definitions" @@ -677,7 +667,7 @@ impl Visitor2 for PassValidationLinking { } }, Method::Fires => { - if self.def_type != DefinitionType::Primitive { + if !self.def_type.is_primitive() { return Err(ParseError::new_error_str_at_span( &ctx.module.source, call_expr.span, "a call to 'fires' may only occur in primitive component definitions" @@ -693,7 +683,7 @@ impl Visitor2 for PassValidationLinking { Method::Create => {}, Method::Length => {}, Method::Assert => { - if self.def_type == DefinitionType::Function { + if self.def_type.is_function() { return Err(ParseError::new_error_str_at_span( &ctx.module.source, call_expr.span, "assert statement may only occur in components" diff --git a/src/protocol/parser/symbol_table.rs b/src/protocol/parser/symbol_table.rs index 43d7c6d22d7cc859ed1d851d25fc00899012fdaa..3ae3b9c9d8175b41c6bd71ab01a784eacb90430c 100644 --- a/src/protocol/parser/symbol_table.rs +++ b/src/protocol/parser/symbol_table.rs @@ -1,447 +1,324 @@ -// TODO: Maybe allow namespaced-aliased imports. It is currently not possible -// to express the following: -// import Module.Submodule as SubMod -// import SubMod::{Symbol} -// And it is especially not possible to express the following: -// import SubMod::{Symbol} -// import Module.Submodule as SubMod +/// symbol_table.rs +/// +/// The datastructure used to lookup symbols within particular scopes. Scopes +/// may be module-level or definition level, although imports and definitions +/// within definitions are currently not allowed. +/// +/// TODO: Once the compiler has matured, find out ways to optimize to prevent +/// the repeated HashMap lookup. + +use std::collections::HashMap; +use std::collections::hash_map::Entry; + +use crate::protocol::input_source::*; use crate::protocol::ast::*; -use crate::protocol::inputsource::*; +use crate::collections::*; + +const RESERVED_SYMBOLS: usize = 32; + +#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq)] +pub enum SymbolScope { + Global, + Module(RootId), + Definition(DefinitionId), +} -use std::collections::{HashMap, hash_map::Entry}; -use crate::protocol::parser::LexedModule; +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum SymbolClass { + Module, + Struct, + Enum, + Union, + Function, + Component +} -#[derive(PartialEq, Eq, Hash)] -struct SymbolKey { - module_id: RootId, - symbol_name: Vec, +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum DefinitionClass { + Struct, + Enum, + Union, + Function, + Component, } -impl SymbolKey { - fn from_identifier(module_id: RootId, symbol: &Identifier) -> Self { - Self{ module_id, symbol_name: symbol.value.clone() } +impl DefinitionClass { + fn as_symbol_class(&self) -> SymbolClass { + match self { + DefinitionClass::Struct => SymbolClass::Struct, + DefinitionClass::Enum => SymbolClass::Enum, + DefinitionClass::Union => SymbolClass::Union, + DefinitionClass::Function => SymbolClass::Function, + DefinitionClass::Component => SymbolClass::Component, + } } +} + +struct ScopedSymbols { + scope: SymbolScope, + parent_scope: Option, + child_scopes: Vec, + symbols: Vec, +} - fn from_namespaced_identifier(module_id: RootId, symbol: &NamespacedIdentifier) -> Self { - Self{ module_id, symbol_name: symbol.strip_poly_args() } +impl ScopedSymbols { + fn get_symbol<'a>(&'a self, name: &StringRef) -> Option<&'a Symbol> { + for symbol in self.symbols.iter() { + if symbol.name == *name { + return Some(symbol); + } + } + + None } } -pub(crate) enum Symbol { - Namespace(RootId), - Definition((RootId, DefinitionId)), +#[derive(Debug, Clone)] +pub struct SymbolModule { + pub root_id: RootId, + pub introduced_at: ImportId, } -pub(crate) struct SymbolValue { - // Position is the place where the symbol is introduced to a module (this - // position always corresponds to the module whose RootId is stored in the - // `SymbolKey` associated with this `SymbolValue`). For a definition this - // is the position where the symbol is defined, for an import this is the - // position of the import statement. - pub(crate) position: InputPosition, - pub(crate) symbol: Symbol, +#[derive(Debug, Clone)] +pub struct SymbolDefinition { + // Definition location (not necessarily the place where the symbol + // is introduced, as it may be imported). Builtin symbols will have invalid + // spans and module IDs + pub defined_in_module: RootId, + pub defined_in_scope: SymbolScope, + pub definition_span: InputSpan, // full span of definition + pub identifier_span: InputSpan, // span of just the identifier + // Location where the symbol is introduced in its scope + pub imported_at: Option, + // Definition in the heap, with a utility enum to determine its + // class if the ID is not needed. + pub class: DefinitionClass, + pub definition_id: DefinitionId, } -impl SymbolValue { - pub(crate) fn is_namespace(&self) -> bool { - match &self.symbol { - Symbol::Namespace(_) => true, - _ => false - } +impl SymbolDefinition { + /// Clones the entire data structure, but replaces the `imported_at` field + /// with the supplied `ImportId`. + pub(crate) fn into_imported(mut self, imported_at: ImportId) -> Self { + self.imported_at = Some(imported_at); + self } - pub(crate) fn as_namespace(&self) -> Option { - match &self.symbol { - Symbol::Namespace(root_id) => Some(*root_id), - _ => None, +} + +#[derive(Debug, Clone)] +pub enum SymbolVariant { + Module(SymbolModule), + Definition(SymbolDefinition), +} + +impl SymbolVariant { + /// Returns the span at which the item was introduced. For an imported + /// item (all modules, and imported types) this returns the span of the + /// import. For a defined type this returns the span of the identifier + pub(crate) fn span_of_introduction(&self, heap: &Heap) -> InputSpan { + match self { + SymbolVariant::Module(v) => heap[v.introduced_at].span(), + SymbolVariant::Definition(v) => if let Some(import_id) = v.imported_at { + heap[import_id].span() + } else { + v.identifier_span + }, } } - pub(crate) fn as_definition(&self) -> Option<(RootId, DefinitionId)> { - match &self.symbol { - Symbol::Definition((root_id, definition_id)) => Some((*root_id, *definition_id)), - _ => None, + pub(crate) fn as_module(&self) -> &SymbolModule { + match self { + SymbolVariant::Module(v) => v, + SymbolVariant::Definition(_) => unreachable!("called 'as_module' on {:?}", self), } } -} -/// `SymbolTable` is responsible for two parts of the parsing process: firstly -/// it ensures that there are no clashing symbol definitions within each file, -/// and secondly it will resolve all symbols within a module to their -/// appropriate definitions (in case of enums, functions, etc.) and namespaces -/// (currently only external modules can act as namespaces). If a symbol clashes -/// or if a symbol cannot be resolved this will be an error. -/// -/// Within the compilation process the symbol table is responsible for resolving -/// namespaced identifiers (e.g. Module::Enum::EnumVariant) to the appropriate -/// definition (i.e. not namespaces; as the language has no way to use -/// namespaces except for using them in namespaced identifiers). -pub(crate) struct SymbolTable { - // Lookup from module name (not any aliases) to the root id - module_lookup: HashMap, RootId>, - // Lookup from within a module, to a particular imported (potentially - // aliased) or defined symbol. Basically speaking: if the source code of a - // module contains correctly imported/defined symbols, then this lookup - // will always return the corresponding definition - symbol_lookup: HashMap, -} -impl SymbolTable { - pub(crate) fn new() -> Self { - Self{ module_lookup: HashMap::new(), symbol_lookup: HashMap::new() } + pub(crate) fn as_definition(&self) -> &SymbolDefinition { + match self { + SymbolVariant::Module(v) => unreachable!("called 'as_definition' on {:?}", self), + SymbolVariant::Definition(v) => v, + } } - pub(crate) fn build(&mut self, heap: &Heap, modules: &[LexedModule]) -> Result<(), ParseError> { - // Sanity checks - debug_assert!(self.module_lookup.is_empty()); - debug_assert!(self.symbol_lookup.is_empty()); - if cfg!(debug_assertions) { - for (index, module) in modules.iter().enumerate() { - debug_assert_eq!( - index, module.root_id.index as usize, - "module RootId does not correspond to LexedModule index" - ) - } + pub(crate) fn as_definition_mut(&mut self) -> &mut SymbolDefinition { + match self { + SymbolVariant::Module(v) => unreachable!("called 'as_definition_mut' on {:?}", self), + SymbolVariant::Definition(v) => v, } + } +} - // Preparation: create a lookup from module name to root id. This does - // not take aliasing into account. - self.module_lookup.reserve(modules.len()); - for module in modules { - // TODO: Maybe put duplicate module name checking here? - // TODO: @string - self.module_lookup.insert(module.module_name.clone(), module.root_id); +/// TODO: @Cleanup - remove clone everywhere +#[derive(Clone)] +pub struct Symbol { + pub name: StringRef<'static>, + pub variant: SymbolVariant, +} + +impl Symbol { + pub(crate) fn class(&self) -> SymbolClass { + match &self.variant { + SymbolVariant::Module(_) => SymbolClass::Module, + SymbolVariant::Definition(data) => data.class.as_symbol_class(), } + } +} - // Preparation: determine total number of imports we will be inserting - // into the lookup table. We could just iterate over the arena, but then - // we don't know the source file the import belongs to. - let mut lookup_reserve_size = 0; - for module in modules { - let module_root = &heap[module.root_id]; - for import_id in &module_root.imports { - match &heap[*import_id] { - Import::Module(_) => lookup_reserve_size += 1, - Import::Symbols(import) => { - if import.symbols.is_empty() { - // Add all symbols from the other module - match self.module_lookup.get(&import.module) { - Some(target_module_id) => { - lookup_reserve_size += heap[*target_module_id].definitions.len() - }, - None => { - return Err( - ParseError::new_error(&module.source, import.position, "Cannot resolve module") - ); - } - } - } else { - lookup_reserve_size += import.symbols.len(); - } - } - } - } +pub struct SymbolTable { + module_lookup: HashMap, RootId>, + scope_lookup: HashMap, +} - lookup_reserve_size += module_root.definitions.len(); +impl SymbolTable { + pub(crate) fn new() -> Self { + Self{ + module_lookup: HashMap::new(), + scope_lookup: HashMap::new(), } - - self.symbol_lookup.reserve(lookup_reserve_size); - - // First pass: we go through all of the modules and add lookups to - // symbols that are defined within that module. Cross-module imports are - // not yet resolved - for module in modules { - let root = &heap[module.root_id]; - for definition_id in &root.definitions { - let definition = &heap[*definition_id]; - let identifier = definition.identifier(); - if let Err(previous_position) = self.add_definition_symbol( - identifier.position, SymbolKey::from_identifier(module.root_id, &identifier), - module.root_id, *definition_id - ) { - return Err( - ParseError::new_error(&module.source, definition.position(), "Symbol is multiply defined") - .with_postfixed_info(&module.source, previous_position, "Previous definition was here") - ) - } + } + /// Inserts a new module by its name. Upon module naming conflict the + /// previously associated `RootId` will be returned. + pub(crate) fn insert_module(&mut self, module_name: StringRef<'static>, root_id: RootId) -> Result<(), RootId> { + match self.module_lookup.entry(module_name) { + Entry::Occupied(v) => { + Err(*v.get()) + }, + Entry::Vacant(v) => { + v.insert(root_id); + Ok(()) } } + } - // Second pass: now that we can find symbols in modules, we can resolve - // all imports (if they're correct, that is) - for module in modules { - let root = &heap[module.root_id]; - for import_id in &root.imports { - let import = &heap[*import_id]; - match import { - Import::Module(import) => { - // Find the module using its name - let target_root_id = self.resolve_module(&import.module); - if target_root_id.is_none() { - return Err(ParseError::new_error(&module.source, import.position, "Could not resolve module")); - } - let target_root_id = target_root_id.unwrap(); - if target_root_id == module.root_id { - return Err(ParseError::new_error(&module.source, import.position, "Illegal import of self")); - } + /// Retrieves module `RootId` by name + pub(crate) fn get_module_by_name(&mut self, name: &[u8]) -> Option { + let string_ref = StringRef::new(name); + self.module_lookup.get(&string_ref).map(|v| *v) + } - // Add the target module under its alias - if let Err(previous_position) = self.add_namespace_symbol( - import.position, SymbolKey::from_identifier(module.root_id, &import.alias), - target_root_id - ) { - return Err( - ParseError::new_error(&module.source, import.position, "Symbol is multiply defined") - .with_postfixed_info(&module.source, previous_position, "Previous definition was here") - ); - } - }, - Import::Symbols(import) => { - // Find the target module using its name - let target_root_id = self.resolve_module(&import.module); - if target_root_id.is_none() { - return Err(ParseError::new_error(&module.source, import.position, "Could not resolve module of symbol imports")); - } - let target_root_id = target_root_id.unwrap(); - if target_root_id == module.root_id { - return Err(ParseError::new_error(&module.source, import.position, "Illegal import of self")); - } + /// Inserts a new symbol scope. The parent must have been added to the + /// symbol table before. + pub(crate) fn insert_scope(&mut self, parent_scope: Option, new_scope: SymbolScope) { + debug_assert!( + parent_scope.is_none() || self.scope_lookup.contains_key(parent_scope.as_ref().unwrap()), + "inserting scope {:?} but parent {:?} does not exist", new_scope, parent_scope + ); + debug_assert!(!self.scope_lookup.contains_key(&new_scope), "inserting scope {:?}, but it already exists", new_scope); - // Determine which symbols to import - if import.symbols.is_empty() { - // Import of all symbols, not using any aliases - for definition_id in &heap[target_root_id].definitions { - let definition = &heap[*definition_id]; - let identifier = definition.identifier(); - if let Err(previous_position) = self.add_definition_symbol( - import.position, SymbolKey::from_identifier(module.root_id, identifier), - target_root_id, *definition_id - ) { - return Err( - ParseError::new_error( - &module.source, import.position, - &format!("Imported symbol '{}' is already defined", String::from_utf8_lossy(&identifier.value)) - ) - .with_postfixed_info( - &modules[target_root_id.index as usize].source, - definition.position(), - "The imported symbol is defined here" - ) - .with_postfixed_info( - &module.source, previous_position, "And is previously defined here" - ) - ) - } - } - } else { - // Import of specific symbols, optionally using aliases - for symbol in &import.symbols { - // Because we have already added per-module definitions, we can use - // the table to lookup this particular symbol. Note: within a single - // module a namespace-import and a symbol-import may not collide. - // Hence per-module symbols are unique. - // However: if we import a symbol from another module, we don't want - // to "import a module's imported symbol". And so if we do find - // a symbol match, we need to make sure it is a definition from - // within that module by checking `source_root_id == target_root_id` - let key = SymbolKey::from_identifier(target_root_id, &symbol.name); - let target_symbol = self.symbol_lookup.get(&key); - let symbol_definition_id = match target_symbol { - Some(target_symbol) => { - match target_symbol.symbol { - Symbol::Definition((symbol_root_id, symbol_definition_id)) => { - if symbol_root_id == target_root_id { - Some(symbol_definition_id) - } else { - // This is imported within the target module, and not - // defined within the target module - None - } - }, - Symbol::Namespace(_) => { - // We don't import a module's "module import" - None - } - } - }, - None => None - }; - - if symbol_definition_id.is_none() { - return Err( - ParseError::new_error(&module.source, symbol.position, "Could not resolve symbol") - ) - } - let symbol_definition_id = symbol_definition_id.unwrap(); - - if let Err(previous_position) = self.add_definition_symbol( - symbol.position, SymbolKey::from_identifier(module.root_id, &symbol.alias), - target_root_id, symbol_definition_id - ) { - return Err( - ParseError::new_error(&module.source, symbol.position, "Symbol is multiply defined") - .with_postfixed_info(&module.source, previous_position, "Previous definition was here") - ) - } - } - } - } - } - } + if let Some(parent_scope) = parent_scope { + let parent = self.scope_lookup.get_mut(&parent_scope).unwrap(); + parent.child_scopes.push(new_scope); } - fn find_name(heap: &Heap, root_id: RootId) -> String { - let root = &heap[root_id]; - for pragma_id in &root.pragmas { - match &heap[*pragma_id] { - Pragma::Module(module) => { - return String::from_utf8_lossy(&module.value).to_string() - }, - _ => {}, + + let scope = ScopedSymbols { + scope: new_scope, + parent_scope, + child_scopes: Vec::with_capacity(RESERVED_SYMBOLS), + symbols: Vec::with_capacity(RESERVED_SYMBOLS) + }; + self.scope_lookup.insert(new_scope, scope); + } + + /// Inserts a symbol into a particular scope. The symbol's name may not + /// exist in the scope or any of its parents. If it does collide then the + /// symbol will be returned, together with the symbol that has the same + /// name. + pub(crate) fn insert_symbol(&mut self, in_scope: SymbolScope, symbol: Symbol) -> Result<(), (Symbol, &Symbol)> { + debug_assert!(self.scope_lookup.contains_key(&in_scope), "inserting symbol {}, but scope {:?} does not exist", symbol.name.as_str(), in_scope); + let mut seek_scope = in_scope; + loop { + let scoped_symbols = self.scope_lookup.get(&seek_scope).unwrap(); + for existing_symbol in scoped_symbols.symbols.iter() { + if symbol.name == existing_symbol.name { + return Err((symbol, existing_symbol)) } } - return String::from("Unknown") + match scoped_symbols.parent_scope { + Some(parent_scope) => { seek_scope = parent_scope; }, + None => { break; } + } } - debug_assert_eq!( - self.symbol_lookup.len(), lookup_reserve_size, - "miscalculated reserved size for symbol lookup table" - ); + // If here, then there is no collision + let scoped_symbols = self.scope_lookup.get_mut(&in_scope).unwrap(); + scoped_symbols.symbols.push(symbol); Ok(()) } - /// Resolves a module by its defined name - pub(crate) fn resolve_module(&self, identifier: &Vec) -> Option { - self.module_lookup.get(identifier).map(|v| *v) - } - - pub(crate) fn resolve_symbol<'t>( - &'t self, root_module_id: RootId, identifier: &[u8] - ) -> Option<&'t SymbolValue> { - let lookup_key = SymbolKey{ module_id: root_module_id, symbol_name: Vec::from(identifier) }; - self.symbol_lookup.get(&lookup_key) - } + /// Retrieves a symbol by name by searching in a particular scope and that scope's parents. The + /// returned symbol may both be imported as defined within any of the searched scopes. + pub(crate) fn get_symbol_by_name( + &self, mut in_scope: SymbolScope, name: &[u8] + ) -> Option<&Symbol> { + let string_ref = StringRef::new(name); + loop { + let scope = self.scope_lookup.get(&in_scope); + if scope.is_none() { + return None; + } + let scope = scope.unwrap(); - pub(crate) fn resolve_identifier<'t>( - &'t self, root_module_id: RootId, identifier: &Identifier - ) -> Option<&'t SymbolValue> { - let lookup_key = SymbolKey::from_identifier(root_module_id, identifier); - self.symbol_lookup.get(&lookup_key) + if let Some(symbol) = scope.get_symbol(&string_ref) { + return Some(symbol); + } else { + // Could not find symbol in current scope, seek in the parent scope if it exists + match &scope.parent_scope { + Some(parent_scope) => { in_scope = *parent_scope; }, + None => return None, + } + } + } } - /// Resolves a namespaced symbol. This method will go as far as possible in - /// going to the right symbol. It will halt the search when: - /// 1. Polymorphic arguments are encountered on the identifier. - /// 2. A non-namespace symbol is encountered. - /// 3. A part of the identifier couldn't be resolved to anything - /// The returned iterator will always point to the next symbol (even if - /// nothing was found) - pub(crate) fn resolve_namespaced_identifier<'t, 'i>( - &'t self, root_module_id: RootId, identifier: &'i NamespacedIdentifier - ) -> (Option<&'t SymbolValue>, NamespacedIdentifierIter<'i>) { - let mut iter = identifier.iter(); - let mut symbol: Option<&SymbolValue> = None; - let mut within_module_id = root_module_id; - - while let Some((partial, poly_args)) = iter.next() { - // Lookup the symbol within the currently iterated upon module - let lookup_key = SymbolKey{ module_id: within_module_id, symbol_name: Vec::from(partial) }; - let new_symbol = self.symbol_lookup.get(&lookup_key); - - match new_symbol { - None => { - // Can't find anything - symbol = None; - break; - }, - Some(new_symbol) => { - // Found something, but if we already moved to another - // module then we don't want to keep jumping across modules, - // we're only interested in symbols defined within that - // module. - match &new_symbol.symbol { - Symbol::Namespace(new_root_id) => { - if root_module_id != within_module_id { - // This new symbol is imported by a foreign - // module, so this is an error - debug_assert!(symbol.is_some()); - debug_assert!(symbol.unwrap().is_namespace()); - debug_assert!(iter.num_returned() > 1); - symbol = None; - break; - } - within_module_id = *new_root_id; - symbol = Some(new_symbol); - }, - Symbol::Definition((definition_root_id, _)) => { - // Found a definition, but if we already jumped - // modules, then this must be defined within that - // module. - if root_module_id != within_module_id && within_module_id != *definition_root_id { - // This is an imported definition within the module - // So keep the old - debug_assert!(symbol.is_some()); - debug_assert!(symbol.unwrap().is_namespace()); - debug_assert!(iter.num_returned() > 1); - symbol = None; - break; - } - symbol = Some(new_symbol); - break; + /// Retrieves a symbol by name by searching in a particular scope and that scope's parents. The + /// returned symbol must be defined within any of the searched scopes and may not be imported. + /// In case such an imported symbol exists then this function still returns `None`. + pub(crate) fn get_symbol_by_name_defined_in_scope( + &self, in_scope: SymbolScope, name: &[u8] + ) -> Option<&Symbol> { + match self.get_symbol_by_name(in_scope, name) { + Some(symbol) => { + match &symbol.variant { + SymbolVariant::Module(_) => { + None // in-scope modules are always imported + }, + SymbolVariant::Definition(variant) => { + if variant.imported_at.is_some() || variant.defined_in_scope == SymbolScope::Global { + // Symbol is imported or lives in the global scope. + // Things in the global scope are defined by the + // compiler. + None + } else { + Some(symbol) } } } - } - - if poly_args.is_some() { - // Polymorphic argument specification should also be a fully - // resolved result. - break; - } - } - - match symbol { - None => (None, iter), - Some(symbol) => (Some(symbol), iter) + }, + None => None, } } - /// Attempts to add a namespace symbol. Returns `Ok` if the symbol was - /// inserted. If the symbol already exists then `Err` will be returned - /// together with the previous definition's source position (in the origin - /// module's source file). - // Note: I would love to return a reference to the value, but Rust is - // preventing me from doing so... That, or I'm not smart enough... - fn add_namespace_symbol( - &mut self, origin_position: InputPosition, key: SymbolKey, target_module_id: RootId - ) -> Result<(), InputPosition> { - match self.symbol_lookup.entry(key) { - Entry::Occupied(o) => Err(o.get().position), - Entry::Vacant(v) => { - v.insert(SymbolValue{ - position: origin_position, - symbol: Symbol::Namespace(target_module_id) - }); - Ok(()) - } - } - } + /// Retrieves all symbols that are defined within a particular scope. Imported symbols are + /// ignored. Returns `true` if the scope was found (which may contain 0 defined symbols) and + /// `false` if the scope was not found. + pub(crate) fn get_all_symbols_defined_in_scope(&self, in_scope: SymbolScope, target: &mut Vec) -> bool { + match self.scope_lookup.get(&in_scope) { + Some(scope) => { + for symbol in &scope.symbols { + if let SymbolVariant::Definition(definition) = &symbol.variant { + if definition.imported_at.is_some() { + continue; + } - /// Attempts to add a definition symbol. Returns `Ok` if the symbol was - /// inserted. If the symbol already exists then `Err` will be returned - /// together with the previous definition's source position (in the origin - /// module's source file). - fn add_definition_symbol( - &mut self, origin_position: InputPosition, key: SymbolKey, - target_module_id: RootId, target_definition_id: DefinitionId, - ) -> Result<(), InputPosition> { - match self.symbol_lookup.entry(key) { - Entry::Occupied(o) => Err(o.get().position), - Entry::Vacant(v) => { - v.insert(SymbolValue { - position: origin_position, - symbol: Symbol::Definition((target_module_id, target_definition_id)) - }); - Ok(()) - } + // Defined in scope, so push onto target + target.push(symbol.clone()); + } + } + + true + }, + None => false, } } } \ No newline at end of file diff --git a/src/protocol/parser/symbol_table2.rs b/src/protocol/parser/symbol_table2.rs deleted file mode 100644 index b6b5668e65e700ec1ddb62d4861e57154d3de566..0000000000000000000000000000000000000000 --- a/src/protocol/parser/symbol_table2.rs +++ /dev/null @@ -1,333 +0,0 @@ -/// symbol_table.rs -/// -/// The datastructure used to lookup symbols within particular scopes. Scopes -/// may be module-level or definition level, although imports and definitions -/// within definitions are currently not allowed. -/// -/// TODO: Once the compiler has matured, find out ways to optimize to prevent -/// the repeated HashMap lookup. - -use std::collections::HashMap; -use std::collections::hash_map::Entry; - -use crate::protocol::input_source2::*; -use crate::protocol::ast::*; -use crate::collections::*; - -const RESERVED_SYMBOLS: usize = 32; - -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub enum SymbolScope { - Global, - Module(RootId), - Definition(DefinitionId), -} - -#[derive(Clone, Copy, PartialEq, Eq)] -pub enum SymbolClass { - Module, - Struct, - Enum, - Union, - Function, - Component -} - -#[derive(Clone, Copy, PartialEq, Eq)] -pub enum DefinitionClass { - Struct, - Enum, - Union, - Function, - Component, -} - -impl DefinitionClass { - fn as_symbol_class(&self) -> SymbolClass { - match self { - DefinitionClass::Struct => SymbolClass::Struct, - DefinitionClass::Enum => SymbolClass::Enum, - DefinitionClass::Union => SymbolClass::Union, - DefinitionClass::Function => SymbolClass::Function, - DefinitionClass::Component => SymbolClass::Component, - } - } -} - -struct ScopedSymbols { - scope: SymbolScope, - parent_scope: Option, - child_scopes: Vec, - symbols: Vec, -} - -impl ScopedSymbols { - fn get_symbol<'a>(&'a self, name: &StringRef) -> Option<&'a Symbol> { - for symbol in self.symbols.iter() { - if symbol.name == *name { - return Some(symbol); - } - } - - None - } -} - -impl SymbolDefinition { - pub fn symbol_class(&self) -> SymbolClass { - use SymbolDefinition as SD; - use SymbolClass as SC; - - match self { - SD::Module(_) => SC::Module, - SD::Struct(_) => SC::Struct, - SD::Enum(_) => SC::Enum, - SD::Union(_) => SC::Union, - SD::Function(_) => SC::Function, - SD::Component(_) => SC::Component, - } - } -} - -#[derive(Debug)] -pub struct SymbolModule { - pub root_id: RootId, - pub introduced_at: ImportId, -} - -#[derive(Debug, Clone)] -pub struct SymbolDefinition { - // Definition location (not necessarily the place where the symbol - // is introduced, as it may be imported). Builtin symbols will have invalid - // spans and module IDs - pub defined_in_module: RootId, - pub defined_in_scope: SymbolScope, - pub definition_span: InputSpan, // full span of definition - pub identifier_span: InputSpan, // span of just the identifier - // Location where the symbol is introduced in its scope - pub imported_at: Option, - // Definition in the heap, with a utility enum to determine its - // class if the ID is not needed. - pub class: DefinitionClass, - pub definition_id: DefinitionId, -} - -impl SymbolDefinition { - /// Clones the entire data structure, but replaces the `imported_at` field - /// with the supplied `ImportId`. - pub(crate) fn into_imported(mut self, imported_at: ImportId) -> Self { - self.imported_at = Some(imported_at); - self - } -} - -#[derive(Debug)] -pub enum SymbolVariant { - Module(SymbolModule), - Definition(SymbolDefinition), -} - -impl SymbolVariant { - /// Returns the span at which the item was introduced. For an imported - /// item (all modules, and imported types) this returns the span of the - /// import. For a defined type this returns the span of the identifier - pub(crate) fn span_of_introduction(&self, heap: &Heap) -> InputSpan { - match self { - SymbolVariant::Module(v) => heap[v.introduced_at].span(), - SymbolVariant::Definition(v) => if let Some(import_id) = v.imported_at { - heap[import_id].span() - } else { - v.identifier_span - }, - } - } - - pub(crate) fn as_module(&self) -> &SymbolModule { - match self { - SymbolVariant::Module(v) => v, - SymbolVariant::Definition(_) => unreachable!("called 'as_module' on {:?}", self), - } - } - - pub(crate) fn as_definition(&self) -> &SymbolDefinition { - match self { - SymbolVariant::Module(v) => unreachable!("called 'as_definition' on {:?}", self), - SymbolVariant::Definition(v) => v, - } - } - - pub(crate) fn as_definition_mut(&mut self) -> &mut SymbolDefinition { - match self { - SymbolVariant::Module(v) => unreachable!("called 'as_definition_mut' on {:?}", self), - SymbolVariant::Definition(v) => v, - } - } -} - -#[derive(Clone)] -pub struct Symbol { - pub name: StringRef<'static>, - pub variant: SymbolVariant, -} - -impl Symbol { - pub(crate) fn class(&self) -> SymbolClass { - match &self.variant { - SymbolVariant::Module(_) => SymbolClass::Module, - SymbolVariant::Definition(data) => data.class.as_symbol_class(), - } - } -} - -pub struct SymbolTable { - module_lookup: HashMap, RootId>, - scope_lookup: HashMap, -} - -impl SymbolTable { - /// Inserts a new module by its name. Upon module naming conflict the - /// previously associated `RootId` will be returned. - pub(crate) fn insert_module(&mut self, module_name: StringRef<'static>, root_id: RootId) -> Result<(), RootId> { - match self.module_lookup.entry(module_name) { - Entry::Occupied(v) => { - Err(*v.get()) - }, - Entry::Vacant(v) => { - v.insert(root_id); - Ok(()) - } - } - } - - /// Retrieves module `RootId` by name - pub(crate) fn get_module_by_name(&mut self, name: &[u8]) -> Option { - let string_ref = StringRef::new(name); - self.module_lookup.get(&string_ref).map(|v| *v) - } - - /// Inserts a new symbol scope. The parent must have been added to the - /// symbol table before. - pub(crate) fn insert_scope(&mut self, parent_scope: Option, new_scope: SymbolScope) { - debug_assert!( - parent_scope.is_none() || self.scope_lookup.contains_key(parent_scope.as_ref().unwrap()), - "inserting scope {:?} but parent {:?} does not exist", new_scope, parent_scope - ); - debug_assert!(!self.scope_lookup.contains_key(&new_scope), "inserting scope {:?}, but it already exists", new_scope); - - if let Some(parent_scope) = parent_scope { - let parent = self.scope_lookup.get_mut(&parent_scope).unwrap(); - parent.child_scopes.push(new_scope); - } - - let scope = ScopedSymbols { - scope: new_scope, - parent_scope, - child_scopes: Vec::with_capacity(RESERVED_SYMBOLS), - symbols: Vec::with_capacity(RESERVED_SYMBOLS) - }; - self.scope_lookup.insert(new_scope, scope); - } - - /// Inserts a symbol into a particular scope. The symbol's name may not - /// exist in the scope or any of its parents. If it does collide then the - /// symbol will be returned, together with the symbol that has the same - /// name. - pub(crate) fn insert_symbol(&mut self, in_scope: SymbolScope, symbol: Symbol) -> Result<(), (Symbol, &Symbol)> { - debug_assert!(self.scope_lookup.contains_key(&in_scope), "inserting symbol {}, but scope {:?} does not exist", symbol.name.as_str(), in_scope); - let mut seek_scope = in_scope; - loop { - let scoped_symbols = self.scope_lookup.get(&seek_scope).unwrap(); - for existing_symbol in scoped_symbols.symbols.iter() { - if symbol.name == existing_symbol.name { - return Err((symbol, existing_symbol)) - } - } - - match scoped_symbols.parent_scope { - Some(parent_scope) => { seek_scope = parent_scope; }, - None => { break; } - } - } - - // If here, then there is no collision - let scoped_symbols = self.scope_lookup.get_mut(&in_scope).unwrap(); - scoped_symbols.symbols.push(symbol); - Ok(()) - } - - /// Retrieves a symbol by name by searching in a particular scope and that scope's parents. The - /// returned symbol may both be imported as defined within any of the searched scopes. - pub(crate) fn get_symbol_by_name( - &self, mut in_scope: SymbolScope, name: &[u8] - ) -> Option<&Symbol> { - let string_ref = StringRef::new(name); - loop { - let scope = self.scope_lookup.get(&in_scope); - if scope.is_none() { - return None; - } - let scope = scope.unwrap(); - - if let Some(symbol) = scope.get_symbol(&string_ref) { - return Some(symbol); - } else { - // Could not find symbol in current scope, seek in the parent scope if it exists - match &scope.parent_scope { - Some(parent_scope) => { in_scope = *parent_scope; }, - None => return None, - } - } - } - } - - /// Retrieves a symbol by name by searching in a particular scope and that scope's parents. The - /// returned symbol must be defined within any of the searched scopes and may not be imported. - /// In case such an imported symbol exists then this function still returns `None`. - pub(crate) fn get_symbol_by_name_defined_in_scope( - &self, in_scope: SymbolScope, name: &[u8] - ) -> Option<&Symbol> { - match self.get_symbol_by_name(in_scope, name) { - Some(symbol) => { - match &symbol.variant { - SymbolVariant::Module(_) => { - None // in-scope modules are always imported - }, - SymbolVariant::Definition(variant) => { - if variant.imported_at.is_some() || variant.defined_in_scope == SymbolScope::Global { - // Symbol is imported or lives in the global scope. - // Things in the global scope are defined by the - // compiler. - None - } else { - Some(symbol) - } - } - } - }, - None => None, - } - } - - /// Retrieves all symbols that are defined within a particular scope. Imported symbols are - /// ignored. Returns `true` if the scope was found (which may contain 0 defined symbols) and - /// `false` if the scope was not found. - pub(crate) fn get_all_symbols_defined_in_scope(&self, in_scope: SymbolScope, target: &mut Vec) -> bool { - match self.scope_lookup.get(&in_scope) { - Some(scope) => { - for symbol in &scope.symbols { - if let SymbolVariant::Definition(definition) = &symbol.variant { - if definition.imported_at.is_some() { - continue; - } - - // Defined in scope, so push onto target - target.push(symbol.clone()); - } - } - - true - }, - None => false, - } - } -} \ No newline at end of file diff --git a/src/protocol/parser/token_parsing.rs b/src/protocol/parser/token_parsing.rs index cf4585680eae8a75aa0611abe7da4902207c7c3f..aa47f857aa13f709e8cbe537cb2e7775cf315c5f 100644 --- a/src/protocol/parser/token_parsing.rs +++ b/src/protocol/parser/token_parsing.rs @@ -1,13 +1,13 @@ use crate::collections::{StringRef, ScopedSection}; use crate::protocol::ast::*; -use crate::protocol::input_source2::{ - InputSource2 as InputSource, - InputPosition2 as InputPosition, +use crate::protocol::input_source::{ + InputSource as InputSource, + InputPosition as InputPosition, InputSpan, ParseError, }; use super::tokens::*; -use super::symbol_table2::*; +use super::symbol_table::*; use super::{Module, ModuleCompilationPhase, PassCtx}; // Keywords @@ -70,7 +70,6 @@ pub(crate) const KW_TYPE_INFERRED: &'static [u8] = b"auto"; pub(crate) trait Extendable { type Value; - #[inline] fn push(&mut self, v: Self::Value); } @@ -79,7 +78,7 @@ impl Extendable for Vec { #[inline] fn push(&mut self, v: Self::Value) { - (self as Vec).push(v); + (self as &mut Vec).push(v); } } @@ -88,7 +87,7 @@ impl Extendable for ScopedSection { #[inline] fn push(&mut self, v: Self::Value) { - (self as ScopedSection).push(v); + (self as &mut ScopedSection).push(v); } } @@ -145,6 +144,7 @@ pub(crate) fn consume_comma_separated_until( E: Extendable { let mut had_comma = true; + let mut next; loop { next = iter.next(); if Some(close_delim) == next { @@ -240,7 +240,7 @@ pub(crate) fn maybe_consume_comma_separated_spilled( open_delim: TokenKind, close_delim: TokenKind, source: &InputSource, iter: &mut TokenIter, - consumer_fn: F, target: &mut Vec, item_name_and_article: &'static str, + consumer_fn: F, target: &mut E, item_name_and_article: &'static str, list_name_and_article: &'static str, close_pos: Option<&mut InputPosition> ) -> Result<(), ParseError> where F: Fn(&InputSource, &mut TokenIter) -> Result, @@ -344,7 +344,7 @@ pub(crate) fn consume_character_literal( }, 2 => { if char_text[0] == b'\\' { - let result = parse_escaped_character(char_text[1])?; + let result = parse_escaped_character(source, iter.last_valid_pos(), char_text[1])?; return Ok((result, span)) } }, @@ -379,7 +379,7 @@ pub(crate) fn consume_string_literal( let cur = text[idx]; if cur != b'\\' { if was_escape { - let to_push = parse_escaped_character(cur)?; + let to_push = parse_escaped_character(source, iter.last_valid_pos(), cur)?; buffer.push(to_push); } else { buffer.push(cur as char); @@ -395,7 +395,7 @@ pub(crate) fn consume_string_literal( Ok(span) } -fn parse_escaped_character(v: u8) -> Result { +fn parse_escaped_character(source: &InputSource, pos: InputPosition, v: u8) -> Result { let result = match v { b'r' => '\r', b'n' => '\n', @@ -404,8 +404,8 @@ fn parse_escaped_character(v: u8) -> Result { b'\\' => '\\', b'\'' => '\'', b'"' => '"', - v => return Err(ParseError::new_error_at_span( - source, span, format!("unexpected escaped character '{}'", v) + v => return Err(ParseError::new_error_at_pos( + source, pos, format!("unexpected escaped character '{}'", v) )), }; Ok(result) diff --git a/src/protocol/parser/tokens.rs b/src/protocol/parser/tokens.rs index a3bbf37c46dbda95f6bbe3c037c16368639b61e3..8e57871340a7d2f36840b621bde96b42435fb388 100644 --- a/src/protocol/parser/tokens.rs +++ b/src/protocol/parser/tokens.rs @@ -1,5 +1,5 @@ -use crate::protocol::input_source2::{ - InputPosition2 as InputPosition, +use crate::protocol::input_source::{ + InputPosition as InputPosition, InputSpan }; @@ -216,11 +216,11 @@ impl TokenBuffer { } pub(crate) fn start_pos(&self, range: &TokenRange) -> InputPosition { - self.tokens[range.start].pos + self.tokens[range.start as usize].pos } pub(crate) fn end_pos(&self, range: &TokenRange) -> InputPosition { - let last_token = &self.tokens[range.end - 1]; + let last_token = &self.tokens[range.end as usize - 1]; if last_token.kind == TokenKind::SpanEnd { return last_token.pos } else { @@ -293,7 +293,7 @@ impl<'a> TokenIter<'a> { return if token.kind == TokenKind::SpanEnd { token.pos } else { - token.pos.with_offset(token.kind.num_characters()); + token.pos.with_offset(token.kind.num_characters()) }; } diff --git a/src/protocol/parser/type_table.rs b/src/protocol/parser/type_table.rs index cf5bf9b26476f407d1c90b3405a171e54e0f0e90..2ad628bd6c54fa41baf3f9eb84c7f1961ccd38ac 100644 --- a/src/protocol/parser/type_table.rs +++ b/src/protocol/parser/type_table.rs @@ -2,8 +2,8 @@ use std::fmt::{Formatter, Result as FmtResult}; use std::collections::{HashMap, VecDeque}; use crate::protocol::ast::*; -use crate::protocol::parser::symbol_table2::{SymbolTable, Symbol, SymbolScope}; -use crate::protocol::input_source2::{InputSource2 as InputSource, ParseError}; +use crate::protocol::parser::symbol_table::SymbolScope; +use crate::protocol::input_source::ParseError; use crate::protocol::parser::*; //------------------------------------------------------------------------------ @@ -249,18 +249,6 @@ pub(crate) struct TypeTable { parser_type_iter: VecDeque, } -pub(crate) struct TypeCtx<'a> { - symbols: &'a SymbolTable, - heap: &'a mut Heap, - modules: &'a [Module] -} - -impl<'a> TypeCtx<'a> { - pub(crate) fn new(symbols: &'a SymbolTable, heap: &'a mut Heap, modules: &'a [Module]) -> Self { - Self{ symbols, heap, modules } - } -} - impl TypeTable { /// Construct a new type table without any resolved types. pub(crate) fn new() -> Self { @@ -271,14 +259,15 @@ impl TypeTable { } } - pub(crate) fn build_base_types(&mut self, ctx: &mut TypeCtx) -> Result<(), ParseError> { + pub(crate) fn build_base_types(&mut self, modules: &mut [Module], ctx: &mut PassCtx) -> Result<(), ParseError> { // Make sure we're allowed to cast root_id to index into ctx.modules + debug_assert!(modules.iter().all(|m| m.phase >= ModuleCompilationPhase::DefinitionsParsed)); debug_assert!(self.lookup.is_empty()); debug_assert!(self.iter.top().is_none()); debug_assert!(self.parser_type_iter.is_empty()); if cfg!(debug_assertions) { - for (index, module) in ctx.modules.iter().enumerate() { + for (index, module) in modules.iter().enumerate() { debug_assert_eq!(index, module.root_id.index as usize); } } @@ -287,15 +276,18 @@ impl TypeTable { let reserve_size = ctx.heap.definitions.len(); self.lookup.reserve(reserve_size); - for root_idx in 0..ctx.modules.len() { - let last_definition_idx = ctx.heap[ctx.modules[root_idx].root_id].definitions.len(); + for root_idx in 0..modules.len() { + let last_definition_idx = ctx.heap[modules[root_idx].root_id].definitions.len(); for definition_idx in 0..last_definition_idx { - let definition_id = ctx.heap[ctx.modules[root_idx].root_id].definitions[definition_idx]; - self.resolve_base_definition(ctx, definition_id)?; + let definition_id = ctx.heap[modules[root_idx].root_id].definitions[definition_idx]; + self.resolve_base_definition(modules, ctx, definition_id)?; } } debug_assert_eq!(self.lookup.len(), reserve_size, "mismatch in reserved size of type table"); + for module in modules { + module.phase = ModuleCompilationPhase::TypesAddedToTable; + } Ok(()) } @@ -332,11 +324,11 @@ impl TypeTable { /// This function will resolve just the basic definition of the type, it /// will not handle any of the monomorphized instances of the type. - fn resolve_base_definition<'a>(&'a mut self, ctx: &mut TypeCtx, definition_id: DefinitionId) -> Result<(), ParseError> { + fn resolve_base_definition<'a>(&'a mut self, modules: &[Module], ctx: &mut PassCtx, definition_id: DefinitionId) -> Result<(), ParseError> { // Check if we have already resolved the base definition if self.lookup.contains_key(&definition_id) { return Ok(()); } - let root_id = Self::find_root_id(ctx, definition_id); + let root_id = ctx.heap[definition_id].defined_in(); self.iter.reset(root_id, definition_id); while let Some((root_id, definition_id)) = self.iter.top() { @@ -345,11 +337,11 @@ impl TypeTable { let can_pop_breadcrumb = match definition { // TODO: @cleanup Borrow rules hax - Definition::Enum(_) => self.resolve_base_enum_definition(ctx, root_id, definition_id), - Definition::Union(_) => self.resolve_base_union_definition(ctx, root_id, definition_id), - Definition::Struct(_) => self.resolve_base_struct_definition(ctx, root_id, definition_id), - Definition::Component(_) => self.resolve_base_component_definition(ctx, root_id, definition_id), - Definition::Function(_) => self.resolve_base_function_definition(ctx, root_id, definition_id), + Definition::Enum(_) => self.resolve_base_enum_definition(modules, ctx, root_id, definition_id), + Definition::Union(_) => self.resolve_base_union_definition(modules, ctx, root_id, definition_id), + Definition::Struct(_) => self.resolve_base_struct_definition(modules, ctx, root_id, definition_id), + Definition::Component(_) => self.resolve_base_component_definition(modules, ctx, root_id, definition_id), + Definition::Function(_) => self.resolve_base_function_definition(modules, ctx, root_id, definition_id), }?; // Otherwise: `ingest_resolve_result` has pushed a new breadcrumb @@ -368,7 +360,7 @@ impl TypeTable { /// not instantiate any monomorphized instances of polymorphic enum /// definitions. If a subtype has to be resolved first then this function /// will return `false` after calling `ingest_resolve_result`. - fn resolve_base_enum_definition(&mut self, ctx: &mut TypeCtx, root_id: RootId, definition_id: DefinitionId) -> Result { + fn resolve_base_enum_definition(&mut self, modules: &[Module], ctx: &mut PassCtx, root_id: RootId, definition_id: DefinitionId) -> Result { debug_assert!(ctx.heap[definition_id].is_enum()); debug_assert!(!self.lookup.contains_key(&definition_id), "base enum already resolved"); @@ -401,13 +393,13 @@ impl TypeTable { // Ensure enum names and polymorphic args do not conflict self.check_identifier_collision( - ctx, root_id, &variants, |variant| &variant.identifier, "enum variant" + modules, root_id, &variants, |variant| &variant.identifier, "enum variant" )?; // Because we're parsing an enum, the programmer cannot put the // polymorphic variables inside the variants. But the polymorphic // variables might still be present as "marker types" - self.check_poly_args_collision(ctx, root_id, &definition.poly_vars)?; + self.check_poly_args_collision(modules, ctx, root_id, &definition.poly_vars)?; let poly_vars = Self::create_polymorphic_variables(&definition.poly_vars); self.lookup.insert(definition_id, DefinedType { @@ -430,7 +422,7 @@ impl TypeTable { /// will not instantiate any monomorphized instances of polymorphic union /// definitions. If a subtype has to be resolved first then this function /// will return `false` after calling `ingest_resolve_result`. - fn resolve_base_union_definition(&mut self, ctx: &mut TypeCtx, root_id: RootId, definition_id: DefinitionId) -> Result { + fn resolve_base_union_definition(&mut self, modules: &[Module], ctx: &mut PassCtx, root_id: RootId, definition_id: DefinitionId) -> Result { debug_assert!(ctx.heap[definition_id].is_union()); debug_assert!(!self.lookup.contains_key(&definition_id), "base union already resolved"); @@ -442,8 +434,8 @@ impl TypeTable { UnionVariantValue::None => {}, UnionVariantValue::Embedded(embedded) => { for parser_type in embedded { - let resolve_result = self.resolve_base_parser_type(ctx, root_id, parser_type)?; - if !self.ingest_resolve_result(ctx, resolve_result)? { + let resolve_result = self.resolve_base_parser_type(modules, ctx, root_id, parser_type)?; + if !self.ingest_resolve_result(modules, ctx, resolve_result)? { return Ok(false) } } @@ -475,9 +467,9 @@ impl TypeTable { // Ensure union names and polymorphic args do not conflict self.check_identifier_collision( - ctx, root_id, &variants, |variant| &variant.identifier, "union variant" + modules, root_id, &variants, |variant| &variant.identifier, "union variant" )?; - self.check_poly_args_collision(ctx, root_id, &definition.poly_vars)?; + self.check_poly_args_collision(modules, ctx, root_id, &definition.poly_vars)?; // Construct polymorphic variables and mark the ones that are in use let mut poly_vars = Self::create_polymorphic_variables(&definition.poly_vars); @@ -486,7 +478,7 @@ impl TypeTable { Self::mark_used_polymorphic_variables(&mut poly_vars, parser_type); } } - let is_polymorph = poly_args.iter().any(|arg| arg.is_in_use); + let is_polymorph = poly_vars.iter().any(|arg| arg.is_in_use); // Insert base definition in type table self.lookup.insert(definition_id, DefinedType { @@ -496,7 +488,7 @@ impl TypeTable { variants, tag_representation: Self::enum_tag_type(-1, tag_value), }), - poly_vars: poly_args, + poly_vars, is_polymorph, is_pointerlike: false, // TODO: @cyclic_types monomorphs: Vec::new() @@ -508,7 +500,7 @@ impl TypeTable { /// Resolves the basic struct definition to an entry in the type table. It /// will not instantiate any monomorphized instances of polymorphic struct /// definitions. - fn resolve_base_struct_definition(&mut self, ctx: &mut TypeCtx, root_id: RootId, definition_id: DefinitionId) -> Result { + fn resolve_base_struct_definition(&mut self, modules: &[Module], ctx: &mut PassCtx, root_id: RootId, definition_id: DefinitionId) -> Result { debug_assert!(ctx.heap[definition_id].is_struct()); debug_assert!(!self.lookup.contains_key(&definition_id), "base struct already resolved"); @@ -516,8 +508,8 @@ impl TypeTable { // Make sure all fields point to resolvable types for field_definition in &definition.fields { - let resolve_result = self.resolve_base_parser_type(ctx, root_id, &field_definition.parser_type)?; - if !self.ingest_resolve_result(ctx, resolve_result)? { + let resolve_result = self.resolve_base_parser_type(modules, ctx, root_id, &field_definition.parser_type)?; + if !self.ingest_resolve_result(modules, ctx, resolve_result)? { return Ok(false) } } @@ -533,9 +525,9 @@ impl TypeTable { // And make sure no conflicts exist in field names and/or polymorphic args self.check_identifier_collision( - ctx, root_id, &fields, |field| &field.identifier, "struct field" + modules, root_id, &fields, |field| &field.identifier, "struct field" )?; - self.check_poly_args_collision(ctx, root_id, &definition.poly_vars)?; + self.check_poly_args_collision(modules, ctx, root_id, &definition.poly_vars)?; // Construct representation of polymorphic arguments let mut poly_vars = Self::create_polymorphic_variables(&definition.poly_vars); @@ -543,7 +535,7 @@ impl TypeTable { Self::mark_used_polymorphic_variables(&mut poly_vars, &field.parser_type); } - let is_polymorph = poly_args.iter().any(|arg| arg.is_in_use); + let is_polymorph = poly_vars.iter().any(|arg| arg.is_in_use); self.lookup.insert(definition_id, DefinedType{ ast_root: root_id, @@ -551,7 +543,7 @@ impl TypeTable { definition: DefinedTypeVariant::Struct(StructType{ fields, }), - poly_vars: poly_args, + poly_vars, is_polymorph, is_pointerlike: false, // TODO: @cyclic monomorphs: Vec::new(), @@ -563,7 +555,7 @@ impl TypeTable { /// Resolves the basic function definition to an entry in the type table. It /// will not instantiate any monomorphized instances of polymorphic function /// definitions. - fn resolve_base_function_definition(&mut self, ctx: &mut TypeCtx, root_id: RootId, definition_id: DefinitionId) -> Result { + fn resolve_base_function_definition(&mut self, modules: &[Module], ctx: &mut PassCtx, root_id: RootId, definition_id: DefinitionId) -> Result { debug_assert!(ctx.heap[definition_id].is_function()); debug_assert!(!self.lookup.contains_key(&definition_id), "base function already resolved"); @@ -571,16 +563,16 @@ impl TypeTable { // Check the return type debug_assert_eq!(definition.return_types.len(), 1, "not one return type"); // TODO: @ReturnValues - let resolve_result = self.resolve_base_parser_type(ctx, root_id, &definition.return_types[0])?; - if !self.ingest_resolve_result(ctx, resolve_result)? { + let resolve_result = self.resolve_base_parser_type(modules, ctx, root_id, &definition.return_types[0])?; + if !self.ingest_resolve_result(modules, ctx, resolve_result)? { return Ok(false) } // Check the argument types for param_id in &definition.parameters { let param = &ctx.heap[*param_id]; - let resolve_result = self.resolve_base_parser_type(ctx, root_id, ¶m.parser_type)?; - if !self.ingest_resolve_result(ctx, resolve_result)? { + let resolve_result = self.resolve_base_parser_type(modules, ctx, root_id, ¶m.parser_type)?; + if !self.ingest_resolve_result(modules, ctx, resolve_result)? { return Ok(false) } } @@ -597,9 +589,9 @@ impl TypeTable { // Check conflict of argument and polyarg identifiers self.check_identifier_collision( - ctx, root_id, &arguments, |arg| &arg.identifier, "function argument" + modules, root_id, &arguments, |arg| &arg.identifier, "function argument" )?; - self.check_poly_args_collision(ctx, root_id, &definition.poly_vars)?; + self.check_poly_args_collision(modules, ctx, root_id, &definition.poly_vars)?; // Construct polymorphic arguments let mut poly_vars = Self::create_polymorphic_variables(&definition.poly_vars); @@ -607,7 +599,7 @@ impl TypeTable { for argument in &arguments { Self::mark_used_polymorphic_variables(&mut poly_vars, &argument.parser_type); } - let is_polymorph = poly_args.iter().any(|arg| arg.is_in_use); + let is_polymorph = poly_vars.iter().any(|arg| arg.is_in_use); // Construct entry in type table self.lookup.insert(definition_id, DefinedType{ @@ -617,7 +609,7 @@ impl TypeTable { return_types: definition.return_types.clone(), arguments, }), - poly_vars: poly_args, + poly_vars, is_polymorph, is_pointerlike: false, // TODO: @cyclic monomorphs: Vec::new(), @@ -629,7 +621,7 @@ impl TypeTable { /// Resolves the basic component definition to an entry in the type table. /// It will not instantiate any monomorphized instancees of polymorphic /// component definitions. - fn resolve_base_component_definition(&mut self, ctx: &mut TypeCtx, root_id: RootId, definition_id: DefinitionId) -> Result { + fn resolve_base_component_definition(&mut self, modules: &[Module], ctx: &mut PassCtx, root_id: RootId, definition_id: DefinitionId) -> Result { debug_assert!(ctx.heap[definition_id].is_component()); debug_assert!(!self.lookup.contains_key(&definition_id), "base component already resolved"); @@ -639,8 +631,8 @@ impl TypeTable { // Check argument types for param_id in &definition.parameters { let param = &ctx.heap[*param_id]; - let resolve_result = self.resolve_base_parser_type(ctx, root_id, ¶m.parser_type)?; - if !self.ingest_resolve_result(ctx, resolve_result)? { + let resolve_result = self.resolve_base_parser_type(modules, ctx, root_id, ¶m.parser_type)?; + if !self.ingest_resolve_result(modules, ctx, resolve_result)? { return Ok(false) } } @@ -657,14 +649,14 @@ impl TypeTable { // Check conflict of argument and polyarg identifiers self.check_identifier_collision( - ctx, root_id, &arguments, |arg| &arg.identifier, "component argument" + modules, root_id, &arguments, |arg| &arg.identifier, "component argument" )?; - self.check_poly_args_collision(ctx, root_id, &definition.poly_vars)?; + self.check_poly_args_collision(modules, ctx, root_id, &definition.poly_vars)?; // Construct polymorphic arguments let mut poly_vars = Self::create_polymorphic_variables(&definition.poly_vars); for argument in &arguments { - Self::mark_used_polymorphic_variables(&mut poly_vars, &argument.parser_type)?; + Self::mark_used_polymorphic_variables(&mut poly_vars, &argument.parser_type); } let is_polymorph = poly_vars.iter().any(|v| v.is_in_use); @@ -691,7 +683,7 @@ impl TypeTable { /// resolving the current type and exit to the outer resolving loop. In the /// latter case the `result` value was `ResolveResult::Unresolved`, implying /// that the type must be resolved first. - fn ingest_resolve_result(&mut self, ctx: &TypeCtx, result: ResolveResult) -> Result { + fn ingest_resolve_result(&mut self, modules: &[Module], ctx: &PassCtx, result: ResolveResult) -> Result { match result { ResolveResult::Builtin | ResolveResult::PolymoprhicArgument => Ok(true), ResolveResult::Resolved(_, _) => Ok(true), @@ -699,7 +691,7 @@ impl TypeTable { if self.iter.contains(root_id, definition_id) { // Cyclic dependency encountered // TODO: Allow this - let module_source = &ctx.modules[root_id.index as usize].source; + let module_source = &modules[root_id.index as usize].source; let mut error = ParseError::new_error_str_at_span( module_source, ctx.heap[definition_id].identifier().span, "Evaluating this type definition results in a cyclic type" @@ -712,7 +704,7 @@ impl TypeTable { "Which depends on this definition" }; - let module_source = &ctx.modules[root_id.index as usize].source; + let module_source = &modules[root_id.index as usize].source; error = error.with_info_str_at_span(module_source, ctx.heap[*definition_id].identifier().span, msg); } @@ -736,7 +728,7 @@ impl TypeTable { /// Hence if one checks a particular parser type for being resolved, one may /// get back a result value indicating an embedded type (with a different /// DefinitionId) is unresolved. - fn resolve_base_parser_type(&mut self, ctx: &TypeCtx, root_id: RootId, parser_type: &ParserType) -> Result { + fn resolve_base_parser_type(&mut self, modules: &[Module], ctx: &PassCtx, root_id: RootId, parser_type: &ParserType) -> Result { // Note that as we iterate over the elements of a use ParserTypeVariant as PTV; @@ -769,7 +761,7 @@ impl TypeTable { PTV::Definition(embedded_id, _) => { let definition = &ctx.heap[embedded_id]; if !(definition.is_struct() || definition.is_enum() || definition.is_union()) { - let module_source = &ctx.modules[root_id.index as usize].source; + let module_source = &modules[root_id.index as usize].source; return Err(ParseError::new_error_str_at_span( module_source, element.full_span, "expected a datatype (struct, enum or union)" )) @@ -792,14 +784,14 @@ impl TypeTable { /// Go through a list of identifiers and ensure that all identifiers have /// unique names fn check_identifier_collision &Identifier>( - &self, ctx: &TypeCtx, root_id: RootId, items: &[T], getter: F, item_name: &'static str + &self, modules: &[Module], root_id: RootId, items: &[T], getter: F, item_name: &'static str ) -> Result<(), ParseError> { for (item_idx, item) in items.iter().enumerate() { let item_ident = getter(item); for other_item in &items[0..item_idx] { let other_item_ident = getter(other_item); if item_ident == other_item_ident { - let module_source = &ctx.modules[root_id.index as usize].source; + let module_source = &modules[root_id.index as usize].source; return Err(ParseError::new_error_at_span( module_source, item_ident.span, format!("This {} is defined more than once", item_name) ).with_info_at_span( @@ -816,18 +808,18 @@ impl TypeTable { /// arguments all have unique names, and the arguments do not conflict with /// any symbols defined at the module scope. fn check_poly_args_collision( - &self, ctx: &TypeCtx, root_id: RootId, poly_args: &[Identifier] + &self, modules: &[Module], ctx: &PassCtx, root_id: RootId, poly_args: &[Identifier] ) -> Result<(), ParseError> { // Make sure polymorphic arguments are unique and none of the // identifiers conflict with any imported scopes for (arg_idx, poly_arg) in poly_args.iter().enumerate() { for other_poly_arg in &poly_args[..arg_idx] { if poly_arg == other_poly_arg { - let module_source = &ctx.modules[root_id.index as usize].source; + let module_source = &modules[root_id.index as usize].source; return Err(ParseError::new_error_str_at_span( module_source, poly_arg.span, "This polymorphic argument is defined more than once" - ).with_postfixed_info( + ).with_info_str_at_span( module_source, other_poly_arg.span, "It conflicts with this polymorphic argument" )); @@ -838,7 +830,7 @@ impl TypeTable { // in the current module if let Some(symbol) = ctx.symbols.get_symbol_by_name(SymbolScope::Module(root_id), poly_arg.value.as_bytes()) { // We have a conflict - let module_source = &ctx.modules[root_id.index as usize].source; + let module_source = &modules[root_id.index as usize].source; let introduction_span = symbol.variant.span_of_introduction(ctx.heap); return Err(ParseError::new_error_str_at_span( module_source, poly_arg.span, @@ -869,7 +861,7 @@ impl TypeTable { fn mark_used_polymorphic_variables(poly_vars: &mut Vec, parser_type: &ParserType) { for element in & parser_type.elements { - if let ParserTypeVariant::PolymorphicArgument(_, idx) = element { + if let ParserTypeVariant::PolymorphicArgument(_, idx) = &element.variant { poly_vars[*idx].is_in_use = true; } } @@ -889,20 +881,4 @@ impl TypeTable { PrimitiveType::Long } } - - fn find_root_id(ctx: &TypeCtx, definition_id: DefinitionId) -> RootId { - // TODO: Keep in lookup or something - for module in ctx.modules { - let root_id = module.root_id; - let root = &ctx.heap[root_id]; - for module_definition_id in root.definitions.iter() { - if *module_definition_id == definition_id { - return root_id - } - } - } - - debug_assert!(false, "DefinitionId without corresponding RootId"); - unreachable!(); - } } \ No newline at end of file diff --git a/src/protocol/parser/utils.rs b/src/protocol/parser/utils.rs deleted file mode 100644 index 265daac2f257ca0481ff4b47e3bd354d30fb7654..0000000000000000000000000000000000000000 --- a/src/protocol/parser/utils.rs +++ /dev/null @@ -1,174 +0,0 @@ - use crate::protocol::ast::*; -use crate::protocol::inputsource::*; -use super::symbol_table::*; -use super::type_table::*; - -/// Utility result type. -pub(crate) enum FindTypeResult<'t, 'i> { - // Found the type exactly - Found((&'t DefinedType, NamespacedIdentifierIter<'i>)), - // Could not match symbol - SymbolNotFound{ident_pos: InputPosition}, - // Matched part of the namespaced identifier, but not completely - SymbolPartial{ident_pos: InputPosition, ident_iter: NamespacedIdentifierIter<'i>}, - // Symbol matched, but points to a namespace/module instead of a type - SymbolNamespace{ident_pos: InputPosition, symbol_pos: InputPosition}, -} - -// TODO: @cleanup Find other uses of this pattern -// TODO: Hindsight is 20/20: this belongs in the visitor_linker, not in a -// separate file. -impl<'t, 'i> FindTypeResult<'t, 'i> { - /// Utility function to transform the `FindTypeResult` into a `Result` where - /// `Ok` contains the resolved type, and `Err` contains a `ParseError` which - /// can be readily returned. This is the most common use. - pub(crate) fn as_parse_error(self, module_source: &InputSource) -> Result<(&'t DefinedType, NamespacedIdentifierIter<'i>), ParseError> { - match self { - FindTypeResult::Found(defined_type) => Ok(defined_type), - FindTypeResult::SymbolNotFound{ident_pos} => { - Err(ParseError::new_error( - module_source, ident_pos, - "Could not resolve this identifier to a symbol" - )) - }, - FindTypeResult::SymbolPartial{ident_pos, ident_iter} => { - Err(ParseError::new_error( - module_source, ident_pos, - &format!( - "Could not fully resolve this identifier to a symbol, was only able to match '{}'", - &String::from_utf8_lossy(ident_iter.returned_section()) - ) - )) - }, - FindTypeResult::SymbolNamespace{ident_pos, symbol_pos} => { - Err(ParseError::new_error( - module_source, ident_pos, - "This identifier was resolved to a namespace instead of a type" - ).with_postfixed_info( - module_source, symbol_pos, - "This is the referenced namespace" - )) - } - } - } -} - -/// Attempt to find the type pointer to by a (root, identifier) combination. The -/// type must match exactly (no parts in the namespace iterator remaining) and -/// must be a type, not a namespace. -pub(crate) fn find_type_definition<'t, 'i>( - symbols: &SymbolTable, types: &'t TypeTable, - root_id: RootId, identifier: &'i NamespacedIdentifier -) -> FindTypeResult<'t, 'i> { - // Lookup symbol - let (symbol, ident_iter) = symbols.resolve_namespaced_identifier(root_id, identifier); - if symbol.is_none() { - return FindTypeResult::SymbolNotFound{ident_pos: identifier.position}; - } - - // Make sure we resolved it exactly - let symbol = symbol.unwrap(); - if ident_iter.num_remaining() != 0 { - return FindTypeResult::SymbolPartial{ - ident_pos: identifier.position, - ident_iter - }; - } - - match symbol.symbol { - Symbol::Namespace(_) => { - FindTypeResult::SymbolNamespace{ - ident_pos: identifier.position, - symbol_pos: symbol.position - } - }, - Symbol::Definition((_, definition_id)) => { - // If this function is called correctly, then we should always be - // able to match the definition's ID to an entry in the type table. - let definition = types.get_base_definition(&definition_id); - debug_assert!(definition.is_some()); - FindTypeResult::Found((definition.unwrap(), ident_iter)) - } - } -} - -pub(crate) enum MatchPolymorphResult<'t> { - Matching, - InferAll(usize), - Mismatch{defined_type: &'t DefinedType, ident_position: InputPosition, num_specified: usize}, - NoneExpected{defined_type: &'t DefinedType, ident_position: InputPosition}, -} - -impl<'t> MatchPolymorphResult<'t> { - pub(crate) fn as_parse_error(self, heap: &Heap, module_source: &InputSource) -> Result { - match self { - MatchPolymorphResult::Matching => Ok(0), - MatchPolymorphResult::InferAll(count) => { - debug_assert!(count > 0); - Ok(count) - }, - MatchPolymorphResult::Mismatch{defined_type, ident_position, num_specified} => { - let type_identifier = heap[defined_type.ast_definition].identifier(); - let args_name = if defined_type.poly_vars.len() == 1 { - "argument" - } else { - "arguments" - }; - - return Err(ParseError::new_error( - module_source, ident_position, - &format!( - "expected {} polymorphic {} (or none, to infer them) for the type {}, but {} were specified", - defined_type.poly_vars.len(), args_name, - &String::from_utf8_lossy(&type_identifier.value), - num_specified - ) - )) - }, - MatchPolymorphResult::NoneExpected{defined_type, ident_position, ..} => { - let type_identifier = heap[defined_type.ast_definition].identifier(); - return Err(ParseError::new_error( - module_source, ident_position, - &format!( - "the type {} is not polymorphic", - &String::from_utf8_lossy(&type_identifier.value) - ) - )) - } - } - } -} - -/// Attempt to match the polymorphic arguments to the number of polymorphic -/// variables in the definition. -pub(crate) fn match_polymorphic_args_to_vars<'t>( - defined_type: &'t DefinedType, poly_args: Option<&[ParserTypeId]>, ident_position: InputPosition -) -> MatchPolymorphResult<'t> { - if defined_type.poly_vars.is_empty() { - // No polymorphic variables on type - if poly_args.is_some() { - return MatchPolymorphResult::NoneExpected{ - defined_type, - ident_position, - }; - } - } else { - // Polymorphic variables on type - let has_specified = poly_args.map_or(false, |a| a.len() != 0); - if !has_specified { - // Implicitly infer all of the polymorphic arguments - return MatchPolymorphResult::InferAll(defined_type.poly_vars.len()); - } - - let num_specified = poly_args.unwrap().len(); - if num_specified != defined_type.poly_vars.len() { - return MatchPolymorphResult::Mismatch{ - defined_type, - ident_position, - num_specified, - }; - } - } - - MatchPolymorphResult::Matching -} \ No newline at end of file diff --git a/src/protocol/parser/visitor.rs b/src/protocol/parser/visitor.rs index 639ed5ca9285af3f63819d4be6dc7fb8310ae7ae..582c2f26ba0fe1f17394975c0f62c9ec7620e078 100644 --- a/src/protocol/parser/visitor.rs +++ b/src/protocol/parser/visitor.rs @@ -1,7 +1,7 @@ use crate::protocol::ast::*; -use crate::protocol::input_source2::ParseError; +use crate::protocol::input_source::ParseError; use crate::protocol::parser::{type_table::*, Module}; -use crate::protocol::symbol_table2::{SymbolTable}; +use crate::protocol::symbol_table::{SymbolTable}; type Unit = (); pub(crate) type VisitorResult = Result; @@ -12,9 +12,6 @@ pub(crate) const STMT_BUFFER_INIT_CAPACITY: usize = 256; /// Globally configured vector capacity for expression buffers in visitor /// implementations pub(crate) const EXPR_BUFFER_INIT_CAPACITY: usize = 256; -/// Globally configured vector capacity for parser type buffers in visitor -/// implementations -pub(crate) const TYPE_BUFFER_INIT_CAPACITY: usize = 128; /// General context structure that is used while traversing the AST. pub(crate) struct Ctx<'p> { @@ -75,9 +72,9 @@ pub(crate) trait Visitor2 { } // --- enum variant handling - fn visit_enum_definition(&mut self, _ctx: &mut Ctx, _id: EnumId) -> VisitorResult { Ok(()) } - fn visit_union_definition(&mut self, _ctx: &mut Ctx, _id: UnionId) -> VisitorResult{ Ok(()) } - fn visit_struct_definition(&mut self, _ctx: &mut Ctx, _id: StructId) -> VisitorResult { Ok(()) } + fn visit_enum_definition(&mut self, _ctx: &mut Ctx, _id: EnumDefinitionId) -> VisitorResult { Ok(()) } + fn visit_union_definition(&mut self, _ctx: &mut Ctx, _id: UnionDefinitionId) -> VisitorResult{ Ok(()) } + fn visit_struct_definition(&mut self, _ctx: &mut Ctx, _id: StructDefinitionId) -> VisitorResult { Ok(()) } fn visit_component_definition(&mut self, _ctx: &mut Ctx, _id: ComponentDefinitionId) -> VisitorResult { Ok(()) } fn visit_function_definition(&mut self, _ctx: &mut Ctx, _id: FunctionDefinitionId) -> VisitorResult { Ok(()) } diff --git a/src/protocol/tests/utils.rs b/src/protocol/tests/utils.rs index ece23a2f6126c52d67029237c90612ac3d21ddfb..b3758135f2ce67f881e0fb6912ff9fe436ab87b3 100644 --- a/src/protocol/tests/utils.rs +++ b/src/protocol/tests/utils.rs @@ -1,10 +1,11 @@ use crate::protocol::{ ast::*, - inputsource::*, + input_source::*, parser::{ *, type_table::TypeTable, symbol_table::SymbolTable, + token_parsing::*, }, }; @@ -63,8 +64,8 @@ impl Tester { pub(crate) fn compile(self) -> AstTesterResult { let mut parser = Parser::new(); for (source_idx, source) in self.sources.into_iter().enumerate() { - let mut cursor = std::io::Cursor::new(source); - let input_source = InputSource::new("", &mut cursor) + let source = source.into_bytes(); + let input_source = InputSource::new(String::from(""), source) .expect(&format!("parsing source {}", source_idx + 1)); if let Err(err) = parser.feed(input_source) { @@ -140,7 +141,7 @@ impl AstOkTester { let mut found = false; for definition in self.heap.definitions.iter() { if let Definition::Struct(definition) = definition { - if String::from_utf8_lossy(&definition.identifier.value) != name { + if definition.identifier.value.as_str() != name { continue; } @@ -163,7 +164,7 @@ impl AstOkTester { let mut found = false; for definition in self.heap.definitions.iter() { if let Definition::Enum(definition) = definition { - if String::from_utf8_lossy(&definition.identifier.value) != name { + if definition.identifier.value.as_str() != name { continue; } @@ -186,7 +187,7 @@ impl AstOkTester { let mut found = false; for definition in self.heap.definitions.iter() { if let Definition::Union(definition) = definition { - if String::from_utf8_lossy(&definition.identifier.value) != name { + if definition.identifier.value.as_str() != name { continue; } @@ -209,7 +210,7 @@ impl AstOkTester { let mut found = false; for definition in self.heap.definitions.iter() { if let Definition::Function(definition) = definition { - if String::from_utf8_lossy(&definition.identifier.value) != name { + if definition.identifier.value.as_str() != name { continue; } @@ -287,7 +288,7 @@ impl<'a> StructTester<'a> { pub(crate) fn for_field(self, name: &str, f: F) -> Self { // Find field with specified name for field in &self.def.fields { - if String::from_utf8_lossy(&field.field.value) == name { + if field.field.value.as_str() == name { let tester = StructFieldTester::new(self.ctx, field); f(tester); return self; @@ -304,11 +305,11 @@ impl<'a> StructTester<'a> { fn assert_postfix(&self) -> String { let mut v = String::new(); v.push_str("Struct{ name: "); - v.push_str(&String::from_utf8_lossy(&self.def.identifier.value)); + v.push_str(self.def.identifier.value.as_str()); v.push_str(", fields: ["); for (field_idx, field) in self.def.fields.iter().enumerate() { if field_idx != 0 { v.push_str(", "); } - v.push_str(&String::from_utf8_lossy(&field.field.value)); + v.push_str(field.field.value.as_str()); } v.push_str("] }"); v @@ -327,7 +328,7 @@ impl<'a> StructFieldTester<'a> { pub(crate) fn assert_parser_type(self, expected: &str) -> Self { let mut serialized_type = String::new(); - serialize_parser_type(&mut serialized_type, &self.ctx.heap, self.def.parser_type); + serialize_parser_type(&mut serialized_type, &self.ctx.heap, &self.def.parser_type); assert_eq!( expected, &serialized_type, "[{}] Expected type '{}', but got '{}' for {}", @@ -338,11 +339,8 @@ impl<'a> StructFieldTester<'a> { fn assert_postfix(&self) -> String { let mut serialized_type = String::new(); - serialize_parser_type(&mut serialized_type, &self.ctx.heap, self.def.parser_type); - format!( - "StructField{{ name: {}, parser_type: {} }}", - String::from_utf8_lossy(&self.def.field.value), serialized_type - ) + serialize_parser_type(&mut serialized_type, &self.ctx.heap, &self.def.parser_type); + format!("StructField{{ name: {}, parser_type: {} }}", self.def.field.value.as_str(), serialized_type) } } @@ -386,11 +384,11 @@ impl<'a> EnumTester<'a> { pub(crate) fn assert_postfix(&self) -> String { let mut v = String::new(); v.push_str("Enum{ name: "); - v.push_str(&String::from_utf8_lossy(&self.def.identifier.value)); + v.push_str(self.def.identifier.value.as_str()); v.push_str(", variants: ["); for (variant_idx, variant) in self.def.variants.iter().enumerate() { if variant_idx != 0 { v.push_str(", "); } - v.push_str(&String::from_utf8_lossy(&variant.identifier.value)); + v.push_str(variant.identifier.value.as_str()); } v.push_str("] }"); v @@ -437,11 +435,11 @@ impl<'a> UnionTester<'a> { fn assert_postfix(&self) -> String { let mut v = String::new(); v.push_str("Union{ name: "); - v.push_str(&String::from_utf8_lossy(&self.def.identifier.value)); + v.push_str(self.def.identifier.value.as_str()); v.push_str(", variants: ["); for (variant_idx, variant) in self.def.variants.iter().enumerate() { if variant_idx != 0 { v.push_str(", "); } - v.push_str(&String::from_utf8_lossy(&variant.identifier.value)); + v.push_str(variant.identifier.value.as_str()); } v.push_str("] }"); v @@ -461,12 +459,12 @@ impl<'a> FunctionTester<'a> { pub(crate) fn for_variable(self, name: &str, f: F) -> Self { // Find the memory statement in order to find the local let mem_stmt_id = seek_stmt( - self.ctx.heap, self.def.body, + self.ctx.heap, self.def.body.upcast(), &|stmt| { if let Statement::Local(local) = stmt { if let LocalStatement::Memory(memory) = local { let local = &self.ctx.heap[memory.variable]; - if local.identifier.value == name.as_bytes() { + if local.identifier.value.as_str() == name { return true; } } @@ -487,7 +485,7 @@ impl<'a> FunctionTester<'a> { // Find the assignment expression that follows it let assignment_id = seek_expr_in_stmt( - self.ctx.heap, self.def.body, + self.ctx.heap, self.def.body.upcast(), &|expr| { if let Expression::Assignment(assign_expr) = expr { if let Expression::Variable(variable_expr) = &self.ctx.heap[assign_expr.left] { @@ -552,7 +550,7 @@ impl<'a> FunctionTester<'a> { // Use the inner match index to find the expression let expr_id = seek_expr_in_stmt( - &self.ctx.heap, self.def.body, + &self.ctx.heap, self.def.body.upcast(), &|expr| expr.position().offset == inner_match_idx ); assert!( @@ -573,10 +571,7 @@ impl<'a> FunctionTester<'a> { } fn assert_postfix(&self) -> String { - format!( - "Function{{ name: {} }}", - &String::from_utf8_lossy(&self.def.identifier.value) - ) + format!("Function{{ name: {} }}", self.def.identifier.value.as_str()) } } @@ -596,7 +591,7 @@ impl<'a> VariableTester<'a> { pub(crate) fn assert_parser_type(self, expected: &str) -> Self { let mut serialized = String::new(); - serialize_parser_type(&mut serialized, self.ctx.heap, self.local.parser_type); + serialize_parser_type(&mut serialized, self.ctx.heap, &self.local.parser_type); assert_eq!( expected, &serialized, @@ -622,11 +617,7 @@ impl<'a> VariableTester<'a> { } fn assert_postfix(&self) -> String { - println!("DEBUG: {:?}", self.assignment.concrete_type); - format!( - "Variable{{ name: {} }}", - &String::from_utf8_lossy(&self.local.identifier.value) - ) + format!("Variable{{ name: {} }}", self.local.identifier.value.as_str()) } } @@ -823,57 +814,83 @@ fn has_monomorph<'a>(ctx: TestCtx<'a>, definition_id: DefinitionId, serialized_m (has_match, full_buffer) } -fn serialize_parser_type(buffer: &mut String, heap: &Heap, id: ParserTypeId) { +fn serialize_parser_type(buffer: &mut String, heap: &Heap, parser_type: &ParserType) { use ParserTypeVariant as PTV; - let p = &heap[id]; - match &p.variant { - PTV::Message => buffer.push_str("msg"), - PTV::Bool => buffer.push_str("bool"), - PTV::Byte => buffer.push_str("byte"), - PTV::Short => buffer.push_str("short"), - PTV::Int => buffer.push_str("int"), - PTV::Long => buffer.push_str("long"), - PTV::String => buffer.push_str("string"), - PTV::IntegerLiteral => buffer.push_str("intlit"), - PTV::Inferred => buffer.push_str("auto"), - PTV::Array(sub_id) => { - serialize_parser_type(buffer, heap, *sub_id); - buffer.push_str("[]"); - }, - PTV::Input(sub_id) => { - buffer.push_str("in<"); - serialize_parser_type(buffer, heap, *sub_id); - buffer.push('>'); - }, - PTV::Output(sub_id) => { - buffer.push_str("out<"); - serialize_parser_type(buffer, heap, *sub_id); - buffer.push('>'); - }, - PTV::Symbolic(symbolic) => { - buffer.push_str(&String::from_utf8_lossy(&symbolic.identifier.value)); - if symbolic.poly_args2.len() > 0 { + fn write_bytes(buffer: &mut String, bytes: &[u8]) { + let utf8 = String::from_utf8_lossy(bytes); + buffer.push_str(&utf8); + } + + fn serialize_variant(buffer: &mut String, heap: &Heap, parser_type: &ParserType, mut idx: usize) -> usize { + match &parser_type.elements[idx].variant { + PTV::Message => write_bytes(buffer, KW_TYPE_MESSAGE), + PTV::Bool => write_bytes(buffer, KW_TYPE_BOOL), + PTV::UInt8 => write_bytes(buffer, KW_TYPE_UINT8), + PTV::UInt16 => write_bytes(buffer, KW_TYPE_UINT16), + PTV::UInt32 => write_bytes(buffer, KW_TYPE_UINT32), + PTV::UInt64 => write_bytes(buffer, KW_TYPE_UINT64), + PTV::SInt8 => write_bytes(buffer, KW_TYPE_SINT8), + PTV::SInt16 => write_bytes(buffer, KW_TYPE_SINT16), + PTV::SInt32 => write_bytes(buffer, KW_TYPE_SINT32), + PTV::SInt64 => write_bytes(buffer, KW_TYPE_SINT64), + PTV::Character => write_bytes(buffer, KW_TYPE_CHAR), + PTV::String => write_bytes(buffer, KW_TYPE_STRING), + PTV::IntegerLiteral => buffer.push_str("int_literal"), + PTV::Inferred => write_bytes(buffer, KW_TYPE_INFERRED), + PTV::Array => { + idx = serialize_variant(buffer, heap, parser_type, idx + 1); + buffer.push_str("[]"); + }, + PTV::Input => { + write_bytes(buffer, KW_TYPE_IN_PORT); buffer.push('<'); - for (poly_idx, poly_arg) in symbolic.poly_args2.iter().enumerate() { - if poly_idx != 0 { buffer.push(','); } - serialize_parser_type(buffer, heap, *poly_arg); - } + idx = serialize_variant(buffer, heap, parser_type, idx + 1); + buffer.push('>'); + }, + PTV::Output => { + write_bytes(buffer, KW_TYPE_OUT_PORT); + buffer.push('<'); + idx = serialize_variant(buffer, heap, parser_type, idx + 1); buffer.push('>'); + }, + PTV::PolymorphicArgument(definition_id, poly_idx) => { + let definition = &heap[*definition_id]; + let poly_arg = &definition.poly_vars()[*poly_idx]; + buffer.push_str(poly_arg.value.as_str()); + }, + PTV::Definition(definition_id, num_embedded) => { + let definition = &heap[*definition_id]; + buffer.push_str(definition.identifier().value.as_str()); + + let num_embedded = *num_embedded; + if num_embedded != 0 { + buffer.push('<'); + for embedded_idx in 0..num_embedded { + if embedded_idx != 0 { + buffer.push(','); + } + idx = serialize_variant(buffer, heap, parser_type, idx + 1); + } + buffer.push('>'); + } } } + + idx } + + serialize_variant(buffer, heap, parser_type, 0); } fn serialize_concrete_type(buffer: &mut String, heap: &Heap, def: DefinitionId, concrete: &ConcreteType) { // Retrieve polymorphic variables - let poly_vars = match &heap[def] { - Definition::Function(definition) => &definition.poly_vars, - Definition::Component(definition) => &definition.poly_vars, - Definition::Struct(definition) => &definition.poly_vars, - Definition::Enum(definition) => &definition.poly_vars, - Definition::Union(definition) => &definition.poly_vars, - }; + let poly_vars = heap[def].poly_vars(); + + fn write_bytes(buffer: &mut String, bytes: &[u8]) { + let utf8 = String::from_utf8_lossy(bytes); + buffer.push_str(&utf8); + } fn serialize_recursive( buffer: &mut String, heap: &Heap, poly_vars: &Vec, concrete: &ConcreteType, mut idx: usize @@ -883,16 +900,21 @@ fn serialize_concrete_type(buffer: &mut String, heap: &Heap, def: DefinitionId, let part = &concrete.parts[idx]; match part { CTP::Marker(poly_idx) => { - buffer.push_str(&String::from_utf8_lossy(&poly_vars[*poly_idx].value)); + buffer.push_str(poly_vars[*poly_idx].value.as_str()); }, CTP::Void => buffer.push_str("void"), - CTP::Message => buffer.push_str("msg"), - CTP::Bool => buffer.push_str("bool"), - CTP::Byte => buffer.push_str("byte"), - CTP::Short => buffer.push_str("short"), - CTP::Int => buffer.push_str("int"), - CTP::Long => buffer.push_str("long"), - CTP::String => buffer.push_str("string"), + CTP::Message => write_bytes(buffer, KW_TYPE_MESSAGE), + CTP::Bool => write_bytes(buffer, KW_TYPE_BOOL), + CTP::UInt8 => write_bytes(buffer, KW_TYPE_UINT8), + CTP::UInt16 => write_bytes(buffer, KW_TYPE_UINT16), + CTP::UInt32 => write_bytes(buffer, KW_TYPE_UINT32), + CTP::UInt64 => write_bytes(buffer, KW_TYPE_UINT64), + CTP::SInt8 => write_bytes(buffer, KW_TYPE_SINT8), + CTP::SInt16 => write_bytes(buffer, KW_TYPE_SINT16), + CTP::SInt32 => write_bytes(buffer, KW_TYPE_SINT32), + CTP::SInt64 => write_bytes(buffer, KW_TYPE_SINT64), + CTP::Character => write_bytes(buffer, KW_TYPE_CHAR), + CTP::String => write_bytes(buffer, KW_TYPE_STRING), CTP::Array => { idx = serialize_recursive(buffer, heap, poly_vars, concrete, idx + 1); buffer.push_str("[]"); @@ -902,18 +924,20 @@ fn serialize_concrete_type(buffer: &mut String, heap: &Heap, def: DefinitionId, buffer.push_str("[..]"); }, CTP::Input => { - buffer.push_str("in<"); + write_bytes(buffer, KW_TYPE_IN_PORT); + buffer.push('<'); idx = serialize_recursive(buffer, heap, poly_vars, concrete, idx + 1); buffer.push('>'); }, CTP::Output => { - buffer.push_str("out<"); + write_bytes(buffer, KW_TYPE_OUT_PORT); + buffer.push('<'); idx = serialize_recursive(buffer, heap, poly_vars, concrete, idx + 1); buffer.push('>'); }, CTP::Instance(definition_id, num_sub) => { let definition_name = heap[*definition_id].identifier(); - buffer.push_str(&String::from_utf8_lossy(&definition_name.value)); + buffer.push_str(definition_name.value.as_str()); if *num_sub != 0 { buffer.push('<'); for sub_idx in 0..*num_sub { @@ -961,15 +985,17 @@ fn seek_stmt bool>(heap: &Heap, start: StatementId, f: &F) }, Statement::Labeled(stmt) => seek_stmt(heap, stmt.body, f), Statement::If(stmt) => { - if let Some(id) = seek_stmt(heap,stmt.true_body, f) { - return Some(id); - } else if let Some(id) = seek_stmt(heap, stmt.false_body, f) { + if let Some(id) = seek_stmt(heap, stmt.true_body.upcast(), f) { return Some(id); + } else if let Some(false_body) = stmt.false_body { + if let Some(id) = seek_stmt(heap, false_body.upcast(), f) { + return Some(id); + } } None }, - Statement::While(stmt) => seek_stmt(heap, stmt.body, f), - Statement::Synchronous(stmt) => seek_stmt(heap, stmt.body, f), + Statement::While(stmt) => seek_stmt(heap, stmt.body.upcast(), f), + Statement::Synchronous(stmt) => seek_stmt(heap, stmt.body.upcast(), f), _ => None }; @@ -1019,14 +1045,6 @@ fn seek_expr_in_expr bool>(heap: &Heap, start: ExpressionI Expression::Select(expr) => { seek_expr_in_expr(heap, expr.subject, f) }, - Expression::Array(expr) => { - for element in &expr.elements { - if let Some(id) = seek_expr_in_expr(heap, *element, f) { - return Some(id) - } - } - None - }, Expression::Literal(expr) => { if let Literal::Struct(lit) = &expr.value { for field in &lit.fields { @@ -1034,6 +1052,12 @@ fn seek_expr_in_expr bool>(heap: &Heap, start: ExpressionI return Some(id) } } + } else if let Literal::Array(elements) = &expr.value { + for element in elements { + if let Some(id) = seek_expr_in_expr(heap, *element, f) { + return Some(id) + } + } } None }, @@ -1069,16 +1093,20 @@ fn seek_expr_in_stmt bool>(heap: &Heap, start: StatementId Statement::If(stmt) => { None .or_else(|| seek_expr_in_expr(heap, stmt.test, f)) - .or_else(|| seek_expr_in_stmt(heap, stmt.true_body, f)) - .or_else(|| seek_expr_in_stmt(heap, stmt.false_body, f)) + .or_else(|| seek_expr_in_stmt(heap, stmt.true_body.upcast(), f)) + .or_else(|| if let Some(false_body) = stmt.false_body { + seek_expr_in_stmt(heap, false_body.upcast(), f) + } else { + None + }) }, Statement::While(stmt) => { None .or_else(|| seek_expr_in_expr(heap, stmt.test, f)) - .or_else(|| seek_expr_in_stmt(heap, stmt.body, f)) + .or_else(|| seek_expr_in_stmt(heap, stmt.body.upcast(), f)) }, Statement::Synchronous(stmt) => { - seek_expr_in_stmt(heap, stmt.body, f) + seek_expr_in_stmt(heap, stmt.body.upcast(), f) }, Statement::Return(stmt) => { seek_expr_in_expr(heap, stmt.expression, f)