diff --git a/src/protocol/ast.rs b/src/protocol/ast.rs index 37c20ecde84a80e926893d9731898a4b442a482a..b7f67792ee752cab0bc43c055739301840ec86ad 100644 --- a/src/protocol/ast.rs +++ b/src/protocol/ast.rs @@ -121,7 +121,7 @@ define_new_ast_id!(IndexingExpressionId, ExpressionId, IndexingExpression, Expre define_new_ast_id!(SlicingExpressionId, ExpressionId, SlicingExpression, Expression::Slicing, expressions); define_new_ast_id!(SelectExpressionId, ExpressionId, SelectExpression, Expression::Select, expressions); define_new_ast_id!(ArrayExpressionId, ExpressionId, ArrayExpression, Expression::Array, expressions); -define_new_ast_id!(ConstantExpressionId, ExpressionId, ConstantExpression, Expression::Constant, expressions); +define_new_ast_id!(LiteralExpressionId, ExpressionId, LiteralExpression, Expression::Literal, expressions); define_new_ast_id!(CallExpressionId, ExpressionId, CallExpression, Expression::Call, expressions); define_new_ast_id!(VariableExpressionId, ExpressionId, VariableExpression, Expression::Variable, expressions); @@ -249,13 +249,13 @@ impl Heap { .alloc_with_id(|id| Expression::Array(f(ArrayExpressionId(id)))), ) } - pub fn alloc_constant_expression( + pub fn alloc_literal_expression( &mut self, - f: impl FnOnce(ConstantExpressionId) -> ConstantExpression, - ) -> ConstantExpressionId { - ConstantExpressionId( + f: impl FnOnce(LiteralExpressionId) -> LiteralExpression, + ) -> LiteralExpressionId { + LiteralExpressionId( self.expressions.alloc_with_id(|id| { - Expression::Constant(f(ConstantExpressionId(id))) + Expression::Literal(f(LiteralExpressionId(id))) }), ) } @@ -937,16 +937,36 @@ impl Display for Type { } } -type CharacterData = Vec; -type IntegerData = i64; +type LiteralCharacter = Vec; +type LiteralInteger = i64; // TODO: @int_literal #[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] -pub enum Constant { +pub enum Literal { Null, // message True, False, - Character(CharacterData), - Integer(IntegerData), + Character(LiteralCharacter), + Integer(LiteralInteger), + Struct(LiteralStruct), +} + +#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] +pub struct LiteralStructField { + // Phase 1: parser + pub(crate) identifier: Identifier, + pub(crate) value: ExpressionId, + // Phase 2: linker + pub(crate) field_idx: usize, // in struct definition +} + +#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] +pub struct LiteralStruct { + // Phase 1: parser + pub(crate) identifier: NamespacedIdentifier, + pub(crate) poly_args: Vec, + pub(crate) fields: Vec, + // Phase 2: linker + pub(crate) definition: Option } #[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] @@ -1956,7 +1976,7 @@ pub enum Expression { Slicing(SlicingExpression), Select(SelectExpression), Array(ArrayExpression), - Constant(ConstantExpression), + Literal(LiteralExpression), Call(CallExpression), Variable(VariableExpression), } @@ -2010,9 +2030,9 @@ impl Expression { _ => panic!("Unable to cast `Expression` to `ArrayExpression`"), } } - pub fn as_constant(&self) -> &ConstantExpression { + pub fn as_constant(&self) -> &LiteralExpression { match self { - Expression::Constant(result) => result, + Expression::Literal(result) => result, _ => panic!("Unable to cast `Expression` to `ConstantExpression`"), } } @@ -2051,7 +2071,7 @@ impl Expression { Expression::Slicing(expr) => &expr.parent, Expression::Select(expr) => &expr.parent, Expression::Array(expr) => &expr.parent, - Expression::Constant(expr) => &expr.parent, + Expression::Literal(expr) => &expr.parent, Expression::Call(expr) => &expr.parent, Expression::Variable(expr) => &expr.parent, } @@ -2075,7 +2095,7 @@ impl Expression { Expression::Slicing(expr) => expr.parent = parent, Expression::Select(expr) => expr.parent = parent, Expression::Array(expr) => expr.parent = parent, - Expression::Constant(expr) => expr.parent = parent, + Expression::Literal(expr) => expr.parent = parent, Expression::Call(expr) => expr.parent = parent, Expression::Variable(expr) => expr.parent = parent, } @@ -2091,7 +2111,7 @@ impl Expression { Expression::Slicing(expr) => &mut expr.concrete_type, Expression::Select(expr) => &mut expr.concrete_type, Expression::Array(expr) => &mut expr.concrete_type, - Expression::Constant(expr) => &mut expr.concrete_type, + Expression::Literal(expr) => &mut expr.concrete_type, Expression::Call(expr) => &mut expr.concrete_type, Expression::Variable(expr) => &mut expr.concrete_type, } @@ -2109,7 +2129,7 @@ impl SyntaxElement for Expression { Expression::Slicing(expr) => expr.position(), Expression::Select(expr) => expr.position(), Expression::Array(expr) => expr.position(), - Expression::Constant(expr) => expr.position(), + Expression::Literal(expr) => expr.position(), Expression::Call(expr) => expr.position(), Expression::Variable(expr) => expr.position(), } @@ -2342,18 +2362,18 @@ impl SyntaxElement for CallExpression { } #[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] -pub struct ConstantExpression { - pub this: ConstantExpressionId, +pub struct LiteralExpression { + pub this: LiteralExpressionId, // Phase 1: parser pub position: InputPosition, - pub value: Constant, + pub value: Literal, // Phase 2: linker pub parent: ExpressionParent, // Phase 3: type checking pub concrete_type: ConcreteType, } -impl SyntaxElement for ConstantExpression { +impl SyntaxElement for LiteralExpression { fn position(&self) -> InputPosition { self.position } diff --git a/src/protocol/ast_printer.rs b/src/protocol/ast_printer.rs index 7a89d9478e39e034e812ff8acfc41721b48127cb..209767533d73ef52f9fcf27d1e63f9291f3f4e9f 100644 --- a/src/protocol/ast_printer.rs +++ b/src/protocol/ast_printer.rs @@ -612,17 +612,17 @@ impl ASTWriter { self.kv(indent2).with_s_key("ConcreteType") .with_custom_val(|v| write_concrete_type(v, heap, def_id, &expr.concrete_type)); }, - Expression::Constant(expr) => { + Expression::Literal(expr) => { self.kv(indent).with_id(PREFIX_CONST_EXPR_ID, expr.this.0.index) .with_s_key("ConstantExpr"); let val = self.kv(indent2).with_s_key("Value"); match &expr.value { - Constant::Null => { val.with_s_val("null"); }, - Constant::True => { val.with_s_val("true"); }, - Constant::False => { val.with_s_val("false"); }, - Constant::Character(char) => { val.with_ascii_val(char); }, - Constant::Integer(int) => { val.with_disp_val(int); }, + Literal::Null => { val.with_s_val("null"); }, + Literal::True => { val.with_s_val("true"); }, + Literal::False => { val.with_s_val("false"); }, + Literal::Character(char) => { val.with_ascii_val(char); }, + Literal::Integer(int) => { val.with_disp_val(int); }, } self.kv(indent2).with_s_key("Parent") diff --git a/src/protocol/eval.rs b/src/protocol/eval.rs index 92174af650bcdd887a90ac5fda6dfa6802d73603..1f7a71288986d49cbab4f04e145dd9e1c36948ac 100644 --- a/src/protocol/eval.rs +++ b/src/protocol/eval.rs @@ -68,12 +68,12 @@ impl Value { _ => unimplemented!(), } } - fn from_constant(constant: &Constant) -> Value { + fn from_constant(constant: &Literal) -> Value { match constant { - Constant::Null => Value::Message(MessageValue(None)), - Constant::True => Value::Boolean(BooleanValue(true)), - Constant::False => Value::Boolean(BooleanValue(false)), - Constant::Integer(val) => { + Literal::Null => Value::Message(MessageValue(None)), + Literal::True => Value::Boolean(BooleanValue(true)), + Literal::False => Value::Boolean(BooleanValue(false)), + Literal::Integer(val) => { // Convert raw ASCII data to UTF-8 string let val = *val; if val >= BYTE_MIN && val <= BYTE_MAX { @@ -86,7 +86,7 @@ impl Value { Value::Long(LongValue(val)) } } - Constant::Character(_data) => unimplemented!(), + Literal::Character(_data) => unimplemented!(), } } fn set(&mut self, index: &Value, value: &Value) -> Option { @@ -1517,7 +1517,7 @@ impl Store { } todo!() } - Expression::Constant(expr) => Ok(Value::from_constant(&expr.value)), + Expression::Literal(expr) => Ok(Value::from_constant(&expr.value)), Expression::Call(expr) => match &expr.method { Method::Get => { assert_eq!(1, expr.arguments.len()); diff --git a/src/protocol/inputsource.rs b/src/protocol/inputsource.rs index 46e5bc7d0f64af8d69356b6a7baf07bdcadbaec3..dda400c8befa547e4f817aa9f675d863c7835ad9 100644 --- a/src/protocol/inputsource.rs +++ b/src/protocol/inputsource.rs @@ -173,6 +173,8 @@ impl InputPosition { fn eval_error(&self, message: S) -> EvalError { EvalError { position: *self, message: message.to_string(), backtrace: Backtrace::new() } } + + pub(crate) fn col(&self) -> usize { self.column } } impl Default for InputPosition { @@ -202,11 +204,11 @@ pub enum ParseErrorType { #[derive(Debug)] pub struct ParseErrorStatement { - error_type: ParseErrorType, - position: InputPosition, - filename: String, - context: String, - message: String, + pub(crate) error_type: ParseErrorType, + pub(crate) position: InputPosition, + pub(crate) filename: String, + pub(crate) context: String, + pub(crate) message: String, } impl ParseErrorStatement { @@ -277,7 +279,7 @@ impl fmt::Display for ParseErrorStatement { #[derive(Debug)] pub struct ParseError2 { - statements: Vec + pub(crate) statements: Vec } impl fmt::Display for ParseError2 { diff --git a/src/protocol/lexer.rs b/src/protocol/lexer.rs index da622a3eabd0ea9c2b2d5f5e6f27aa02016ed6e9..c8061326ab29c160bf1ab6b475677a3fe7039a96 100644 --- a/src/protocol/lexer.rs +++ b/src/protocol/lexer.rs @@ -207,6 +207,81 @@ impl Lexer<'_> { } Ok(()) } + /// Generic comma-separated consumer. If opening delimiter is not found then + /// `Ok(None)` will be returned. Otherwise will consume the comma separated + /// values, allowing a trailing comma. If no comma is found and the closing + /// delimiter is not found, then a parse error with `expected_end_msg` is + /// returned. + fn consume_comma_separated( + &mut self, h: &mut Heap, open: u8, close: u8, expected_end_msg: &str, func: F + ) -> Result>, ParseError2> + where F: Fn(&mut Lexer, &mut Heap) -> Result + { + if Some(open) != self.source.next() { + return Ok(None) + } + + self.source.consume(); + self.consume_whitespace(false)?; + let mut elements = Vec::new(); + let mut had_comma = true; + + loop { + if Some(close) == self.source.next() { + self.source.consume(); + break; + } else if !had_comma { + return Err(ParseError2::new_error( + &self.source, self.source.pos(), expected_end_msg + )); + } + + elements.push(func(self, h)?); + self.consume_whitespace(false)?; + + had_comma = self.source.next() == Some(b','); + if had_comma { + self.source.consume(); + self.consume_whitespace(false)?; + } + } + + Ok(Some(elements)) + } + /// Essentially the same as `consume_comma_separated`, but will not allocate + /// memory. Will return `true` and leave the input position at the end of + /// the comma-separated list if well formed. Otherwise returns `false` and + /// leaves the input position at a "random" position. + fn consume_comma_separated_spilled_without_pos_recovery bool>( + &mut self, open: u8, close: u8, func: F + ) -> bool { + if Some(open) != self.source.next() { + return true; + } + + self.source.consume(); + if self.consume_whitespace(false).is_err() { return false }; + let mut had_comma = true; + loop { + if Some(close) == self.source.next() { + self.source.consume(); + return true; + } else if !had_comma { + return false; + } + + if !func(self) { return false; } + if self.consume_whitespace(false).is_err() { return false }; + + had_comma = self.source.next() == Some(b','); + if had_comma { + self.source.consume(); + if self.consume_whitespace(false).is_err() { return false; } + } + } + + true + } fn consume_ident(&mut self) -> Result, ParseError2> { if !self.has_identifier() { return Err(self.error_at_pos("Expected identifier")); @@ -442,7 +517,6 @@ impl Lexer<'_> { } else if self.has_keyword(b"in") { // TODO: @cleanup: not particularly neat to have this special case // where we enforce polyargs in the parser-phase - // TODO: @hack, temporarily allow inferred port values self.consume_keyword(b"in")?; let poly_args = self.consume_polymorphic_args(h, allow_inference)?; let poly_arg = reduce_port_poly_args(h, &pos, poly_args) @@ -456,7 +530,6 @@ impl Lexer<'_> { })?; ParserTypeVariant::Input(poly_arg) } else if self.has_keyword(b"out") { - // TODO: @hack, temporarily allow inferred port values self.consume_keyword(b"out")?; let poly_args = self.consume_polymorphic_args(h, allow_inference)?; let poly_arg = reduce_port_poly_args(h, &pos, poly_args) @@ -572,34 +645,16 @@ impl Lexer<'_> { /// doesn't encounter well-formed polymorphic arguments, then the input /// position is left at a "random" position. fn maybe_consume_poly_args_spilled_without_pos_recovery(&mut self) -> bool { - if let Some(b'<') = self.source.next() { - self.source.consume(); - if self.consume_whitespace(false).is_err() { return false; } - loop { - if !self.maybe_consume_type_spilled_without_pos_recovery() { return false; } - if self.consume_whitespace(false).is_err() { return false; } - let has_comma = self.source.next() == Some(b','); - if has_comma { - self.source.consume(); - if self.consume_whitespace(false).is_err() { return false; } - } - if let Some(b'>') = self.source.next() { - self.source.consume(); - break; - } else if !has_comma { - return false; - } - } - } - - return true; + self.consume_comma_separated_spilled_without_pos_recovery( + b'<', b'>', |lexer| { + lexer.maybe_consume_type_spilled_without_pos_recovery() + }) } - /// Consumes polymorphic arguments and its delimiters if specified. The - /// input position may be at whitespace. If polyargs are present then the - /// whitespace and the args are consumed and the input position will be - /// placed after the polyarg list. If polyargs are not present then the - /// input position will remain unmodified and an empty vector will be + /// Consumes polymorphic arguments and its delimiters if specified. If + /// polyargs are present then the args are consumed and the input position + /// will be placed after the polyarg list. If polyargs are not present then + /// the input position will remain unmodified and an empty vector will be /// returned. /// /// Polymorphic arguments represent the specification of the parametric @@ -607,43 +662,15 @@ impl Lexer<'_> { /// type's polymorphic variables. fn consume_polymorphic_args(&mut self, h: &mut Heap, allow_inference: bool) -> Result, ParseError2> { let backup_pos = self.source.pos(); - self.consume_whitespace(false)?; - if let Some(b'<') = self.source.next() { - // Has polymorphic args, at least one type must be specified - self.source.consume(); - self.consume_whitespace(false)?; - let mut poly_args = Vec::new(); - - loop { - // TODO: @cleanup, remove the no_more_types var - poly_args.push(self.consume_type2(h, allow_inference)?); - self.consume_whitespace(false)?; - - let has_comma = self.source.next() == Some(b','); - if has_comma { - // We might not actually be getting more types when the - // comma is at the end of the line, and we get a closing - // angular bracket on the next line. - self.source.consume(); - self.consume_whitespace(false)?; - } - - if let Some(b'>') = self.source.next() { - self.source.consume(); - break; - } else if !has_comma { - return Err(ParseError2::new_error( - &self.source, self.source.pos(), - "Expected the end of the polymorphic argument list" - )) - } + match self.consume_comma_separated( + h, b'<', b'>', "Expected the end of the polymorphic argument list", + |lexer, heap| lexer.consume_type2(heap, allow_inference) + )? { + Some(poly_args) => Ok(poly_args), + None => { + self.source.seek(backup_pos); + Ok(vec![]) } - - Ok(poly_args) - } else { - // No polymorphic args - self.source.seek(backup_pos); - Ok(vec!()) } } @@ -653,41 +680,17 @@ impl Lexer<'_> { /// delimiters and the polymorphic variables are consumed. Otherwise the /// input position will stay where it is. If no polymorphic variables are /// present then an empty vector will be returned. - fn consume_polymorphic_vars(&mut self) -> Result, ParseError2> { + fn consume_polymorphic_vars(&mut self, h: &mut Heap) -> Result, ParseError2> { let backup_pos = self.source.pos(); - if let Some(b'<') = self.source.next() { - // Found the opening delimiter, we want at least one polyvar - self.source.consume(); - self.consume_whitespace(false)?; - let mut poly_vars = Vec::new(); - - loop { - poly_vars.push(self.consume_identifier()?); - self.consume_whitespace(false)?; - - let has_comma = self.source.next() == Some(b','); - if has_comma { - // We may get another variable - self.source.consume(); - self.consume_whitespace(false)?; - } - - if let Some(b'>') = self.source.next() { - self.source.consume(); - break; - } else if !has_comma { - return Err(ParseError2::new_error( - &self.source, self.source.pos(), - "Expected the end of the polymorphic variable list" - )) - } + match self.consume_comma_separated( + h, b'<', b'>', "Expected the end of the polymorphic variable list", + |lexer, heap| lexer.consume_identifier() + )? { + Some(poly_vars) => Ok(poly_vars), + None => { + self.source.seek(backup_pos); + Ok(vec!()) } - - Ok(poly_vars) - } else { - // No polymorphic args - self.source.seek(backup_pos); - Ok(vec!()) } } @@ -702,27 +705,19 @@ impl Lexer<'_> { h.alloc_parameter(|this| Parameter { this, position, parser_type, identifier }); Ok(id) } - fn consume_parameters( - &mut self, - h: &mut Heap, - params: &mut Vec, - ) -> Result<(), ParseError2> { - self.consume_string(b"(")?; - self.consume_whitespace(false)?; - if !self.has_string(b")") { - while self.source.next().is_some() { - params.push(self.consume_parameter(h)?); - self.consume_whitespace(false)?; - if self.has_string(b")") { - break; - } - self.consume_string(b",")?; - self.consume_whitespace(false)?; + fn consume_parameters(&mut self, h: &mut Heap) -> Result, ParseError2> { + match self.consume_comma_separated( + h, b'(', b')', "Expected the end of the parameter list", + |lexer, heap| lexer.consume_parameter(heap) + )? { + Some(params) => Ok(params), + None => { + Err(ParseError2::new_error( + &self.source, self.source.pos(), + "Expected a parameter list" + )) } } - self.consume_string(b")")?; - - Ok(()) } // ==================== @@ -1337,12 +1332,11 @@ impl Lexer<'_> { if self.has_string(b"{") { return Ok(self.consume_array_expression(h)?.upcast()); } - if self.has_constant() - || self.has_keyword(b"null") - || self.has_keyword(b"true") - || self.has_keyword(b"false") - { - return Ok(self.consume_constant_expression(h)?.upcast()); + if self.has_builtin_literal() { + return Ok(self.consume_builtin_literal_expression(h)?.upcast()); + } + if self.has_struct_literal() { + return Ok(self.consume_struct_literal_expression(h)?.upcast()); } if self.has_call_expression() { return Ok(self.consume_call_expression(h)?.upcast()); @@ -1374,24 +1368,27 @@ impl Lexer<'_> { concrete_type: ConcreteType::default(), })) } - fn has_constant(&self) -> bool { + fn has_builtin_literal(&self) -> bool { is_constant(self.source.next()) + || self.has_keyword(b"null") + || self.has_keyword(b"true") + || self.has_keyword(b"false") } - fn consume_constant_expression( + fn consume_builtin_literal_expression( &mut self, h: &mut Heap, - ) -> Result { + ) -> Result { let position = self.source.pos(); let value; if self.has_keyword(b"null") { self.consume_keyword(b"null")?; - value = Constant::Null; + value = Literal::Null; } else if self.has_keyword(b"true") { self.consume_keyword(b"true")?; - value = Constant::True; + value = Literal::True; } else if self.has_keyword(b"false") { self.consume_keyword(b"false")?; - value = Constant::False; + value = Literal::False; } else if self.source.next() == Some(b'\'') { self.source.consume(); let mut data = Vec::new(); @@ -1405,15 +1402,15 @@ impl Lexer<'_> { return Err(self.error_at_pos("Expected character constant")); } self.source.consume(); - value = Constant::Character(data); + value = Literal::Character(data); } else { if !self.has_integer() { return Err(self.error_at_pos("Expected integer constant")); } - value = Constant::Integer(self.consume_integer()?); + value = Literal::Integer(self.consume_integer()?); } - Ok(h.alloc_constant_expression(|this| ConstantExpression { + Ok(h.alloc_literal_expression(|this| LiteralExpression { this, position, value, @@ -1421,6 +1418,65 @@ impl Lexer<'_> { concrete_type: ConcreteType::default(), })) } + + fn has_struct_literal(&mut self) -> bool { + // A struct literal is written as: + // namespace::StructName{ field: expr } + // We will parse up until the opening brace to see if we're dealing with + // a struct literal. + let backup_pos = self.source.pos(); + let result = self.consume_namespaced_identifier_spilled().is_ok() && + self.consume_whitespace(false).is_ok() && + self.maybe_consume_poly_args_spilled_without_pos_recovery() && + self.consume_whitespace(false).is_ok() && + self.source.next() == Some(b'{'); + + self.source.seek(backup_pos); + return result; + } + + fn consume_struct_literal_expression(&mut self, h: &mut Heap) -> Result { + // Consume identifier and polymorphic arguments + let position = self.source.pos(); + let identifier = self.consume_namespaced_identifier()?; + self.consume_whitespace(false)?; + let poly_args = self.consume_polymorphic_args(h, true)?; + self.consume_whitespace(false)?; + + // Consume fields + let fields = match self.consume_comma_separated( + h, b'{', b'}', "Expected the end of the list of struct fields", + |lexer, heap| { + let identifier = lexer.consume_identifier()?; + lexer.consume_whitespace(false)?; + lexer.consume_string(b":")?; + lexer.consume_whitespace(false)?; + let value = lexer.consume_expression(heap)?; + + Ok(LiteralStructField{ identifier, value, field_idx: 0 }) + } + )? { + Some(fields) => fields, + None => return Err(ParseError2::new_error( + self.source, self.source.pos(), + "A struct literal must be followed by its field values" + )) + }; + + Ok(h.alloc_literal_expression(|this| LiteralExpression{ + this, + position, + value: Literal::Struct(LiteralStruct{ + identifier, + poly_args, + fields, + definition: None, + }), + parent: ExpressionParent::None, + concrete_type: Default::default() + })) + } + fn has_call_expression(&mut self) -> bool { // We need to prevent ambiguity with various operators (because we may // be specifying polymorphic variables) and variables. @@ -1674,10 +1730,14 @@ impl Lexer<'_> { &mut self, h: &mut Heap, ) -> Result { - // Consume channel statement and polymorphic argument if specified + // Consume channel statement and polymorphic argument if specified. + // Needs a tiny bit of special parsing to ensure the right amount of + // whitespace is present. let position = self.source.pos(); self.consume_keyword(b"channel")?; + let expect_whitespace = self.source.next() != Some(b'<'); + self.consume_whitespace(expect_whitespace)?; let poly_args = self.consume_polymorphic_args(h, true)?; let poly_arg_id = match poly_args.len() { 0 => h.alloc_parser_type(|this| ParserType{ @@ -1689,7 +1749,7 @@ impl Lexer<'_> { "port construction using 'channel' accepts up to 1 polymorphic argument" )) }; - self.consume_whitespace(true)?; + self.consume_whitespace(false)?; // Consume the output port let out_parser_type = h.alloc_parser_type(|this| ParserType{ @@ -1703,7 +1763,6 @@ impl Lexer<'_> { self.consume_whitespace(false)?; // Consume the input port - // TODO: Unsure about this, both ports refer to the same ParserType, is this ok? let in_parser_type = h.alloc_parser_type(|this| ParserType{ this, pos: position.clone(), variant: ParserTypeVariant::Input(poly_arg_id) }); @@ -2001,48 +2060,27 @@ impl Lexer<'_> { self.consume_whitespace(true)?; let struct_ident = self.consume_identifier()?; self.consume_whitespace(false)?; - let poly_vars = self.consume_polymorphic_vars()?; + let poly_vars = self.consume_polymorphic_vars(h)?; self.consume_whitespace(false)?; // Parse struct fields - self.consume_string(b"{")?; - let mut next = self.source.next(); - let mut fields = Vec::new(); - while next.is_some() { - let char = next.unwrap(); - if char == b'}' { - break; + let fields = match self.consume_comma_separated( + h, b'{', b'}', "Expected the end of the list of struct fields", + |lexer, heap| { + let position = lexer.source.pos(); + let parser_type = lexer.consume_type2(heap, false)?; + lexer.consume_whitespace(true)?; + let field = lexer.consume_identifier()?; + + Ok(StructFieldDefinition{ position, field, parser_type }) } - - // Consume field definition - self.consume_whitespace(false)?; - let field_position = self.source.pos(); - let field_parser_type = self.consume_type2(h, false)?; - self.consume_whitespace(true)?; - let field_ident = self.consume_identifier()?; - self.consume_whitespace(false)?; - - fields.push(StructFieldDefinition{ - position: field_position, - field: field_ident, - parser_type: field_parser_type, - }); - - // If we have a comma, then we may or may not have another field - // definition. Otherwise we expect the struct to be fully defined - // and expect a closing brace - next = self.source.next(); - if let Some(b',') = next { - self.source.consume(); - self.consume_whitespace(false)?; - next = self.source.next(); - } else { - break; - } - } - - // End of struct definition, so we expect a closing brace - self.consume_string(b"}")?; + )? { + Some(fields) => fields, + None => return Err(ParseError2::new_error( + self.source, struct_pos, + "An struct definition must be followed by its fields" + )), + }; // Valid struct definition Ok(h.alloc_struct_definition(|this| StructDefinition{ @@ -2060,77 +2098,58 @@ impl Lexer<'_> { self.consume_whitespace(true)?; let enum_ident = self.consume_identifier()?; self.consume_whitespace(false)?; - let poly_vars = self.consume_polymorphic_vars()?; - self.consume_whitespace(false)?; - - // Parse enum variants - self.consume_string(b"{")?; - let mut next = self.source.next(); - let mut variants = Vec::new(); - while next.is_some() { - let char = next.unwrap(); - if char == b'}' { - break; - } - - // Consume variant identifier - self.consume_whitespace(false)?; - let variant_position = self.source.pos(); - let variant_ident = self.consume_identifier()?; - self.consume_whitespace(false)?; - - // Consume variant (tag) value: may be nothing, in which case it is - // assigned automatically, may be a constant integer, or an embedded - // type as value, resulting in a tagged union - next = self.source.next(); - let variant_value = if let Some(b',') = next { - EnumVariantValue::None - } else if let Some(b'=') = next { - self.source.consume(); - self.consume_whitespace(false)?; - if !self.has_integer() { - return Err(self.error_at_pos("expected integer")); - } - let variant_int = self.consume_integer()?; - self.consume_whitespace(false)?; - EnumVariantValue::Integer(variant_int) - } else if let Some(b'(') = next { - self.source.consume(); - self.consume_whitespace(false)?; - let variant_type = self.consume_type2(h, false)?; - self.consume_whitespace(false)?; - self.consume_string(b")")?; - self.consume_whitespace(false)?; - EnumVariantValue::Type(variant_type) - } else { - return Err(self.error_at_pos("expected ',', '=', or '('")); - }; - - variants.push(EnumVariantDefinition{ - position: variant_position, - identifier: variant_ident, - value: variant_value - }); + let poly_vars = self.consume_polymorphic_vars(h)?; + self.consume_whitespace(false)?; + + let variants = match self.consume_comma_separated( + h, b'{', b'}', "Expected end of enum variant list", + |lexer, heap| { + // Variant identifier + let position = lexer.source.pos(); + let identifier = lexer.consume_identifier()?; + lexer.consume_whitespace(false)?; + + // Optional variant value/type + let next = lexer.source.next(); + let value = match next { + Some(b',') => { + // Do not consume, let `consume_comma_separated` handle + // the next item + EnumVariantValue::None + }, + Some(b'=') => { + // Integer value + lexer.source.consume(); + lexer.consume_whitespace(false)?; + if !lexer.has_integer() { + return Err(lexer.error_at_pos("expected integer")) + } + let value = lexer.consume_integer()?; + EnumVariantValue::Integer(value) + }, + Some(b'(') => { + // Embedded type + lexer.source.consume(); + lexer.consume_whitespace(false)?; + let embedded_type = lexer.consume_type2(heap, false)?; + lexer.consume_whitespace(false)?; + lexer.consume_string(b")")?; + EnumVariantValue::Type(embedded_type) + }, + _ => { + return Err(lexer.error_at_pos("Expected ',', '=', or '('")); + } + }; - // If we have a comma, then we may or may not have another variant, - // otherwise we expect the enum is fully defined - next = self.source.next(); - if let Some(b',') = next { - self.source.consume(); - self.consume_whitespace(false)?; - next = self.source.next(); - } else { - break; + Ok(EnumVariantDefinition{ position, identifier, value }) } - } - - self.consume_string(b"}")?; - - // An enum without variants is somewhat valid, but completely useless - // within the language - if variants.is_empty() { - return Err(ParseError2::new_error(self.source, enum_pos, "enum definition without variants")); - } + )? { + Some(variants) => variants, + None => return Err(ParseError2::new_error( + self.source, enum_pos, + "An enum definition must be followed by its variants" + )), + }; Ok(h.alloc_enum_definition(|this| EnumDefinition{ this, @@ -2155,12 +2174,11 @@ impl Lexer<'_> { self.consume_whitespace(true)?; let identifier = self.consume_identifier()?; self.consume_whitespace(false)?; - let poly_vars = self.consume_polymorphic_vars()?; + let poly_vars = self.consume_polymorphic_vars(h)?; self.consume_whitespace(false)?; // Consume parameters - let mut parameters = Vec::new(); - self.consume_parameters(h, &mut parameters)?; + let parameters = self.consume_parameters(h)?; self.consume_whitespace(false)?; // Parse body @@ -2182,12 +2200,11 @@ impl Lexer<'_> { self.consume_whitespace(true)?; let identifier = self.consume_identifier()?; self.consume_whitespace(false)?; - let poly_vars = self.consume_polymorphic_vars()?; + let poly_vars = self.consume_polymorphic_vars(h)?; self.consume_whitespace(false)?; // Consume parameters - let mut parameters = Vec::new(); - self.consume_parameters(h, &mut parameters)?; + let parameters = self.consume_parameters(h)?; self.consume_whitespace(false)?; // Consume body @@ -2209,12 +2226,11 @@ impl Lexer<'_> { self.consume_whitespace(true)?; let identifier = self.consume_identifier()?; self.consume_whitespace(false)?; - let poly_vars = self.consume_polymorphic_vars()?; + let poly_vars = self.consume_polymorphic_vars(h)?; self.consume_whitespace(false)?; // Consume parameters - let mut parameters = Vec::new(); - self.consume_parameters(h, &mut parameters)?; + let parameters = self.consume_parameters(h)?; self.consume_whitespace(false)?; // Consume body @@ -2321,79 +2337,52 @@ impl Lexer<'_> { self.consume_string(b"::")?; self.consume_whitespace(false)?; - if let Some(b'{') = self.source.next() { - // Import specific symbols, optionally with an alias - self.source.consume(); - self.consume_whitespace(false)?; - - let mut symbols = Vec::new(); - let mut next = self.source.next(); - - while next.is_some() { - let char = next.unwrap(); - if char == b'}' { - break; - } - - let symbol_position = self.source.pos(); - let symbol_name = self.consume_ident()?; - self.consume_whitespace(false)?; - if self.has_string(b"as") { - // Symbol has an alias - self.consume_string(b"as")?; - self.consume_whitespace(true)?; - let symbol_alias = self.consume_ident()?; - - symbols.push(AliasedSymbol{ - position: symbol_position, - name: symbol_name, - alias: symbol_alias, - definition_id: None, - }); - } else { - // Symbol does not have an alias - symbols.push(AliasedSymbol{ - position: symbol_position, - name: symbol_name.clone(), - alias: symbol_name, - definition_id: None, - }); - } - - // A comma indicates that we may have another symbol coming - // up (not necessary), but if not present then we expect the - // end of the symbol list - self.consume_whitespace(false)?; - - next = self.source.next(); - if let Some(b',') = next { - self.source.consume(); - self.consume_whitespace(false)?; - next = self.source.next(); - } else { - break; - } - } - - if let Some(b'}') = next { - // We are fine, push the imported symbols - self.source.consume(); - if symbols.is_empty() { - return Err(ParseError2::new_error(self.source, position, "empty symbol import list")); + let next = self.source.next(); + if Some(b'{') == next { + let symbols = match self.consume_comma_separated( + h, b'{', b'}', "Expected end of import list", + |lexer, heap| { + // Symbol name + let position = lexer.source.pos(); + let name = lexer.consume_ident()?; + lexer.consume_whitespace(false)?; + + // Symbol alias + if lexer.has_string(b"as") { + // With alias + lexer.consume_string(b"as")?; + lexer.consume_whitespace(true)?; + let alias = lexer.consume_ident()?; + + Ok(AliasedSymbol{ + position, + name, + alias, + definition_id: None + }) + } else { + // Without alias + Ok(AliasedSymbol{ + position, + name: name.clone(), + alias: name, + definition_id: None + }) + } } + )? { + Some(symbols) => symbols, + None => unreachable!(), // because we checked for opening '{' + }; - h.alloc_import(|this| Import::Symbols(ImportSymbols{ - this, - position, - module_name: value, - module_id: None, - symbols, - })) - } else { - return Err(self.error_at_pos("Expected '}'")); - } - } else if let Some(b'*') = self.source.next() { - // Import all symbols without alias + h.alloc_import(|this| Import::Symbols(ImportSymbols{ + this, + position, + module_name: value, + module_id: None, + symbols, + })) + } else if Some(b'*') == next { self.source.consume(); h.alloc_import(|this| Import::Symbols(ImportSymbols{ this, diff --git a/src/protocol/mod.rs b/src/protocol/mod.rs index bd3189dcd0b4ad9299940d53507fbca0327686fd..a78613e1189f2faa09bf748d2b550dd42158dc54 100644 --- a/src/protocol/mod.rs +++ b/src/protocol/mod.rs @@ -4,6 +4,7 @@ mod eval; pub(crate) mod inputsource; // mod lexer; mod parser; +#[cfg(test)] mod tests; // TODO: Remove when not benchmarking pub(crate) mod ast; diff --git a/src/protocol/parser/depth_visitor.rs b/src/protocol/parser/depth_visitor.rs index 69ae0899ead16d5fd895820d6dfac8716ef10d5f..688745472b8650137f2239dc2f3d98d7bc243e13 100644 --- a/src/protocol/parser/depth_visitor.rs +++ b/src/protocol/parser/depth_visitor.rs @@ -175,7 +175,7 @@ pub(crate) trait Visitor: Sized { fn visit_constant_expression( &mut self, _h: &mut Heap, - _expr: ConstantExpressionId, + _expr: LiteralExpressionId, ) -> VisitorResult { Ok(()) } @@ -434,7 +434,7 @@ fn recursive_expression( Expression::Slicing(expr) => this.visit_slicing_expression(h, expr.this), Expression::Select(expr) => this.visit_select_expression(h, expr.this), Expression::Array(expr) => this.visit_array_expression(h, expr.this), - Expression::Constant(expr) => this.visit_constant_expression(h, expr.this), + Expression::Literal(expr) => this.visit_constant_expression(h, expr.this), Expression::Call(expr) => this.visit_call_expression(h, expr.this), Expression::Variable(expr) => this.visit_variable_expression(h, expr.this), } @@ -1305,7 +1305,7 @@ impl Visitor for AssignableExpressions { fn visit_constant_expression( &mut self, h: &mut Heap, - expr: ConstantExpressionId, + expr: LiteralExpressionId, ) -> VisitorResult { if self.assignable { self.error(h[expr].position) @@ -1424,7 +1424,7 @@ impl Visitor for IndexableExpressions { fn visit_constant_expression( &mut self, h: &mut Heap, - expr: ConstantExpressionId, + expr: LiteralExpressionId, ) -> VisitorResult { if self.indexable { self.error(h[expr].position) @@ -1532,7 +1532,7 @@ impl Visitor for SelectableExpressions { fn visit_constant_expression( &mut self, h: &mut Heap, - expr: ConstantExpressionId, + expr: LiteralExpressionId, ) -> VisitorResult { if self.selectable { self.error(h[expr].position) diff --git a/src/protocol/parser/type_resolver.rs b/src/protocol/parser/type_resolver.rs index 172314208a847db5063049aa1f14bc3d831f0670..e2ebecf68f9aad56031f77eeee377ee8110bc056 100644 --- a/src/protocol/parser/type_resolver.rs +++ b/src/protocol/parser/type_resolver.rs @@ -1197,7 +1197,7 @@ impl Visitor2 for TypeResolvingVisitor { self.progress_array_expr(ctx, id) } - fn visit_constant_expr(&mut self, ctx: &mut Ctx, id: ConstantExpressionId) -> VisitorResult { + fn visit_literal_expr(&mut self, ctx: &mut Ctx, id: LiteralExpressionId) -> VisitorResult { let upcast_id = id.upcast(); self.insert_initial_expr_inference_type(ctx, upcast_id)?; self.progress_constant_expr(ctx, id) @@ -1386,7 +1386,7 @@ impl TypeResolvingVisitor { let id = expr.this; self.progress_array_expr(ctx, id) }, - Expression::Constant(expr) => { + Expression::Literal(expr) => { let id = expr.this; self.progress_constant_expr(ctx, id) }, @@ -1747,14 +1747,14 @@ impl TypeResolvingVisitor { Ok(()) } - fn progress_constant_expr(&mut self, ctx: &mut Ctx, id: ConstantExpressionId) -> Result<(), ParseError2> { + fn progress_constant_expr(&mut self, ctx: &mut Ctx, id: LiteralExpressionId) -> Result<(), ParseError2> { let upcast_id = id.upcast(); let expr = &ctx.heap[id]; let template = match &expr.value { - Constant::Null => &MESSAGE_TEMPLATE[..], - Constant::Integer(_) => &INTEGERLIKE_TEMPLATE[..], - Constant::True | Constant::False => &BOOL_TEMPLATE[..], - Constant::Character(_) => todo!("character literals") + Literal::Null => &MESSAGE_TEMPLATE[..], + Literal::Integer(_) => &INTEGERLIKE_TEMPLATE[..], + Literal::True | Literal::False => &BOOL_TEMPLATE[..], + Literal::Character(_) => todo!("character literals") }; let progress = self.apply_forced_constraint(ctx, upcast_id, template)?; @@ -2434,7 +2434,7 @@ impl TypeResolvingVisitor { let parser_type = &ctx.heap[parser_type_id]; match &parser_type.variant { PTV::Message => { - /// TODO: @types Remove the Message -> Byte hack at some point... + // TODO: @types Remove the Message -> Byte hack at some point... infer_type.push(ITP::Message); infer_type.push(ITP::Byte); }, diff --git a/src/protocol/parser/type_table.rs b/src/protocol/parser/type_table.rs index f041b25972dff96f84ab901ca3888107883ba7ff..23ed91ac83c39cbde9808e2040070e04337ebf7d 100644 --- a/src/protocol/parser/type_table.rs +++ b/src/protocol/parser/type_table.rs @@ -6,22 +6,22 @@ contains a concrete type definition for each AST type definition. In general terms the type table will go through the following phases during the compilation process: - 1. The base type definitions are resolved after the parser phase has - finished. This implies that the AST is fully constructed, but not yet - annotated. - 2. With the base type definitions resolved, the validation/linker phase will - use the type table (together with the symbol table) to disambiguate - terms (e.g. does an expression refer to a variable, an enum, a constant, - etc.) - 3. During the type checking/inference phase the type table is used to ensure - that the AST contains valid use of types in expressions and statements. - At the same time type inference will find concrete instantiations of - polymorphic types, these will be stored in the type table as monomorphed - instantiations of a generic type. - 4. After type checking and inference (and possibly when constructing byte - code) the type table will construct a type graph and solidify each - non-polymorphic type and monomorphed instantiations of polymorphic types - into concrete types. +1. The base type definitions are resolved after the parser phase has + finished. This implies that the AST is fully constructed, but not yet + annotated. +2. With the base type definitions resolved, the validation/linker phase will + use the type table (together with the symbol table) to disambiguate + terms (e.g. does an expression refer to a variable, an enum, a constant, + etc.) +3. During the type checking/inference phase the type table is used to ensure + that the AST contains valid use of types in expressions and statements. + At the same time type inference will find concrete instantiations of + polymorphic types, these will be stored in the type table as monomorphed + instantiations of a generic type. +4. After type checking and inference (and possibly when constructing byte + code) the type table will construct a type graph and solidify each + non-polymorphic type and monomorphed instantiations of polymorphic types + into concrete types. So a base type is defined by its (optionally polymorphic) representation in the AST. A concrete type has concrete types for each of the polymorphic arguments. A diff --git a/src/protocol/parser/visitor.rs b/src/protocol/parser/visitor.rs index 8332e8e681e073aa53dd5525d7f554c74ccf1d70..6c829516881768ccaec218b1cba7ecc1a20008bf 100644 --- a/src/protocol/parser/visitor.rs +++ b/src/protocol/parser/visitor.rs @@ -207,9 +207,9 @@ pub(crate) trait Visitor2 { let this = expr.this; self.visit_array_expr(ctx, this) } - Expression::Constant(expr) => { + Expression::Literal(expr) => { let this = expr.this; - self.visit_constant_expr(ctx, this) + self.visit_literal_expr(ctx, this) } Expression::Call(expr) => { let this = expr.this; @@ -230,7 +230,7 @@ pub(crate) trait Visitor2 { fn visit_slicing_expr(&mut self, _ctx: &mut Ctx, _id: SlicingExpressionId) -> VisitorResult { Ok(()) } fn visit_select_expr(&mut self, _ctx: &mut Ctx, _id: SelectExpressionId) -> VisitorResult { Ok(()) } fn visit_array_expr(&mut self, _ctx: &mut Ctx, _id: ArrayExpressionId) -> VisitorResult { Ok(()) } - fn visit_constant_expr(&mut self, _ctx: &mut Ctx, _id: ConstantExpressionId) -> VisitorResult { Ok(()) } + fn visit_literal_expr(&mut self, _ctx: &mut Ctx, _id: LiteralExpressionId) -> VisitorResult { Ok(()) } fn visit_call_expr(&mut self, _ctx: &mut Ctx, _id: CallExpressionId) -> VisitorResult { Ok(()) } fn visit_variable_expr(&mut self, _ctx: &mut Ctx, _id: VariableExpressionId) -> VisitorResult { Ok(()) } diff --git a/src/protocol/parser/visitor_linker.rs b/src/protocol/parser/visitor_linker.rs index 14953f6c0ca02beea4640faa69017a37f468f2a5..4f6f8dc30dd2352593ef09ed25c292d3e3bf2349 100644 --- a/src/protocol/parser/visitor_linker.rs +++ b/src/protocol/parser/visitor_linker.rs @@ -695,11 +695,31 @@ impl Visitor2 for ValidityAndLinkerVisitor { Ok(()) } - fn visit_constant_expr(&mut self, ctx: &mut Ctx, id: ConstantExpressionId) -> VisitorResult { + fn visit_literal_expr(&mut self, ctx: &mut Ctx, id: LiteralExpressionId) -> VisitorResult { debug_assert!(!self.performing_breadth_pass); let constant_expr = &mut ctx.heap[id]; - constant_expr.parent = self.expr_parent; + let old_expr_parent = self.expr_parent; + constant_expr.parent = old_expr_parent; + + match &mut constant_expr.value { + Literal::Null | Literal::True | Literal::False | + Literal::Character(_) | Literal::Integer(_) => { + // Just the parent has to be set, done above + }, + Literal::Struct(literal) => { + // Retrieve and set the literals definition + let definition = + // Need to traverse fields expressions in struct + let old_num_exprs = self.expression_buffer.len(); + self.expression_buffer.extend(literal.fields.iter().map(|v| v.value)); + let new_num_exprs = self.expression_buffer.len(); + + self.expression_buffer.truncate(old_num_exprs); + } + } + + self.expr_parent = old_expr_parent; Ok(()) } diff --git a/src/protocol/tests/lexer.rs b/src/protocol/tests/lexer.rs new file mode 100644 index 0000000000000000000000000000000000000000..25d3fba7df135b5e72b6d1325f45b1b316fb7e66 --- /dev/null +++ b/src/protocol/tests/lexer.rs @@ -0,0 +1,109 @@ +/// lexer.rs +/// +/// Simple tests for the lexer. Only tests the lexing of the input source and +/// the resulting AST without relying on the validation/typing pass + +use super::*; + +#[test] +fn test_disallowed_inference() { + Tester::new_single_source_expect_err( + "argument auto inference", + "int func(auto arg) { return 0; }" + ).error(|e| { e + .assert_msg_has(0, "inference is not allowed") + .assert_occurs_at(0, "auto arg"); + }); + + Tester::new_single_source_expect_err( + "return type auto inference", + "auto func(int arg) { return 0; }" + ).error(|e| { e + .assert_msg_has(0, "inference is not allowed") + .assert_occurs_at(0, "auto func"); + }); + + Tester::new_single_source_expect_err( + "implicit polymorph argument auto inference", + "int func(in port) { return port; }" + ).error(|e| { e + .assert_msg_has(0, "inference is not allowed") + .assert_occurs_at(0, "in port"); + }); + + Tester::new_single_source_expect_err( + "explicit polymorph argument auto inference", + "int func(in port) { return port; }" + ).error(|e| { e + .assert_msg_has(0, "inference is not allowed") + .assert_occurs_at(0, "auto> port"); + }); + + Tester::new_single_source_expect_err( + "implicit polymorph return type auto inference", + "in func(in a, in b) { return a; }" + ).error(|e| { e + .assert_msg_has(0, "inference is not allowed") + .assert_occurs_at(0, "in func"); + }); + + Tester::new_single_source_expect_err( + "explicit polymorph return type auto inference", + "in func(in a) { return a; }" + ).error(|e| { e + .assert_msg_has(0, "inference is not allowed") + .assert_occurs_at(0, "auto> func"); + }); +} + +#[test] +fn test_simple_struct_definition() { + Tester::new_single_source_expect_ok( + "empty struct", + "struct Foo{}" + ).for_struct("Foo", |t| { t.assert_num_fields(0); }); + + Tester::new_single_source_expect_ok( + "single field, no comma", + "struct Foo{ int field }" + ).for_struct("Foo", |t| { t + .assert_num_fields(1) + .for_field("field", |f| { + f.assert_parser_type("int"); + }); + }); + + Tester::new_single_source_expect_ok( + "single field, with comma", + "struct Foo{ int field, }" + ).for_struct("Foo", |t| { t + .assert_num_fields(1) + .for_field("field", |f| { f + .assert_parser_type("int"); + }); + }); + + Tester::new_single_source_expect_ok( + "multiple fields, no comma", + "struct Foo{ byte a, short b, int c }" + ).for_struct("Foo", |t| { t + .assert_num_fields(3) + .for_field("a", |f| { f.assert_parser_type("byte"); }) + .for_field("b", |f| { f.assert_parser_type("short"); }) + .for_field("c", |f| { f.assert_parser_type("int"); }); + }); + + Tester::new_single_source_expect_ok( + "multiple fields, with comma", + "struct Foo{ + byte a, + short b, + int c, + }" + ).for_struct("Foo", |t| { t + .assert_num_fields(3) + .for_field("a", |f| { f.assert_parser_type("byte"); }) + .for_field("b", |f| { f.assert_parser_type("short"); }) + .for_field("c", |f| { f.assert_parser_type("int"); }); + }); +} \ No newline at end of file diff --git a/src/protocol/tests/mod.rs b/src/protocol/tests/mod.rs new file mode 100644 index 0000000000000000000000000000000000000000..1f35c6430cd44df939055b6cc424a2e989e613d5 --- /dev/null +++ b/src/protocol/tests/mod.rs @@ -0,0 +1,4 @@ +mod utils; +mod lexer; + +pub(crate) use utils::{Tester}; \ No newline at end of file diff --git a/src/protocol/tests/utils.rs b/src/protocol/tests/utils.rs new file mode 100644 index 0000000000000000000000000000000000000000..f3188e3a773a0851341e89df1bd93e3778eea1b2 --- /dev/null +++ b/src/protocol/tests/utils.rs @@ -0,0 +1,371 @@ +use crate::protocol::ast::*; +use crate::protocol::inputsource::*; +use crate::protocol::parser::*; + +//------------------------------------------------------------------------------ +// Interface for parsing and compiling +//------------------------------------------------------------------------------ + +pub(crate) struct Tester { + test_name: String, + sources: Vec +} + +impl Tester { + /// Constructs a new tester, allows adding multiple sources before compiling + pub(crate) fn new(test_name: S) -> Self { + Self{ + test_name: test_name.to_string(), + sources: Vec::new() + } + } + + /// Utility for quick tests that use a single source file and expect the + /// compilation to succeed. + pub(crate) fn new_single_source_expect_ok(test_name: T, source: S) -> AstOkTester { + Self::new(test_name) + .with_source(source) + .compile() + .expect_ok() + } + + /// Utility for quick tests that use a single source file and expect the + /// compilation to fail. + pub(crate) fn new_single_source_expect_err(test_name: T, source: S) -> AstErrTester { + Self::new(test_name) + .with_source(source) + .compile() + .expect_err() + } + + pub(crate) fn with_source(mut self, source: S) -> Self { + self.sources.push(source.to_string()); + self + } + + pub(crate) fn compile(self) -> AstTesterResult { + let mut parser = Parser::new(); + for (source_idx, source) in self.sources.into_iter().enumerate() { + let mut cursor = std::io::Cursor::new(source); + let input_source = InputSource::new("", &mut cursor) + .expect(&format!("parsing source {}", source_idx + 1)); + + if let Err(err) = parser.feed(input_source) { + return AstTesterResult::Err(AstErrTester::new(self.test_name, err)) + } + } + + parser.compile(); + if let Err(err) = parser.parse() { + return AstTesterResult::Err(AstErrTester::new(self.test_name, err)) + } + + AstTesterResult::Ok(AstOkTester::new(self.test_name, parser)) + } +} + +pub(crate) enum AstTesterResult { + Ok(AstOkTester), + Err(AstErrTester) +} + +impl AstTesterResult { + pub(crate) fn expect_ok(self) -> AstOkTester { + match self { + AstTesterResult::Ok(v) => v, + AstTesterResult::Err(err) => { + let wrapped = ErrorTester{ test_name: &err.test_name, error: &err.error }; + assert!( + false, + "[{}] Expected compilation to succeed, but it failed with {}", + err.test_name, wrapped.assert_postfix() + ); + unreachable!(); + } + } + } + + pub(crate) fn expect_err(self) -> AstErrTester { + match self { + AstTesterResult::Ok(ok) => { + assert!(false, "[{}] Expected compilation to fail, but it succeeded", ok.test_name); + unreachable!(); + }, + AstTesterResult::Err(err) => err, + } + } +} + +//------------------------------------------------------------------------------ +// Interface for successful compilation +//------------------------------------------------------------------------------ + +pub(crate) struct AstOkTester { + test_name: String, + modules: Vec, + heap: Heap, +} + +impl AstOkTester { + fn new(test_name: String, parser: Parser) -> Self { + Self { + test_name, + modules: parser.modules, + heap: parser.heap + } + } + + pub(crate) fn for_struct(self, name: &str, f: F) -> Self { + let mut found = false; + for definition in self.heap.definitions.iter() { + if let Definition::Struct(definition) = definition { + if String::from_utf8_lossy(&definition.identifier.value) != name { + continue; + } + + // Found struct with the same name + let tester = StructTester::new(&self.test_name, definition, &self.heap); + f(tester); + found = true; + break + } + } + + if found { return self } + + assert!( + false, "[{}] Failed to find definition for struct '{}'", + self.test_name, name + ); + unreachable!() + } +} + +//------------------------------------------------------------------------------ +// Utilities for successful compilation +//------------------------------------------------------------------------------ + +pub(crate) struct StructTester<'a> { + test_name: &'a str, + def: &'a StructDefinition, + heap: &'a Heap, +} + +impl<'a> StructTester<'a> { + fn new(test_name: &'a str, def: &'a StructDefinition, heap: &'a Heap) -> Self { + Self{ test_name, def, heap } + } + + pub(crate) fn assert_num_fields(self, num: usize) -> Self { + debug_assert_eq!( + num, self.def.fields.len(), + "[{}] Expected {} struct fields, but found {} for {}", + self.test_name, num, self.def.fields.len(), self.assert_postfix() + ); + self + } + + pub(crate) fn for_field(self, name: &str, f: F) -> Self { + // Find field with specified name + for field in &self.def.fields { + if String::from_utf8_lossy(&field.field.value) == name { + let tester = StructFieldTester::new(self.test_name, field, self.heap); + f(tester); + return self; + } + } + + assert!( + false, "[{}] Could not find struct field '{}' for {}", + self.test_name, name, self.assert_postfix() + ); + unreachable!(); + } + + fn assert_postfix(&self) -> String { + let mut v = String::new(); + v.push_str("Struct{ name: "); + v.push_str(&String::from_utf8_lossy(&self.def.identifier.value)); + v.push_str(", fields: ["); + for (field_idx, field) in self.def.fields.iter().enumerate() { + if field_idx != 0 { v.push_str(", "); } + v.push_str(&String::from_utf8_lossy(&field.field.value)); + } + v.push_str("] }"); + v + } +} + +pub(crate) struct StructFieldTester<'a> { + test_name: &'a str, + def: &'a StructFieldDefinition, + heap: &'a Heap, +} + +impl<'a> StructFieldTester<'a> { + fn new(test_name: &'a str, def: &'a StructFieldDefinition, heap: &'a Heap) -> Self { + Self{ test_name, def, heap } + } + + pub(crate) fn assert_parser_type(self, expected: &str) -> Self { + let mut serialized_type = String::new(); + serialize_parser_type(&mut serialized_type, &self.heap, self.def.parser_type); + debug_assert_eq!( + expected, &serialized_type, + "[{}] Expected type '{}', but got '{}' for {}", + self.test_name, expected, &serialized_type, self.assert_postfix() + ); + self + } + + fn assert_postfix(&self) -> String { + let mut serialized_type = String::new(); + serialize_parser_type(&mut serialized_type, &self.heap, self.def.parser_type); + format!( + "StructField{{ name: {}, parser_type: {} }}", + String::from_utf8_lossy(&self.def.field.value), serialized_type + ) + } +} + +//------------------------------------------------------------------------------ +// Interface for failed compilation +//------------------------------------------------------------------------------ + +pub(crate) struct AstErrTester { + test_name: String, + error: ParseError2, +} + +impl AstErrTester { + fn new(test_name: String, error: ParseError2) -> Self { + Self{ test_name, error } + } + + pub(crate) fn error(&self, f: F) { + // Maybe multiple errors will be supported in the future + let tester = ErrorTester{ test_name: &self.test_name, error: &self.error }; + f(tester) + } +} + +//------------------------------------------------------------------------------ +// Utilities for failed compilation +//------------------------------------------------------------------------------ + +pub(crate) struct ErrorTester<'a> { + test_name: &'a str, + error: &'a ParseError2, +} + +impl<'a> ErrorTester<'a> { + pub(crate) fn assert_num(self, num: usize) -> Self { + assert_eq!( + num, self.error.statements.len(), + "[{}] expected error to consist of '{}' parts, but encountered '{}' for {}", + self.test_name, num, self.error.statements.len(), self.assert_postfix() + ); + + self + } + + pub(crate) fn assert_ctx_has(self, idx: usize, msg: &str) -> Self { + assert!( + self.error.statements[idx].context.contains(msg), + "[{}] expected error statement {}'s context to contain '{}' for {}", + self.test_name, idx, msg, self.assert_postfix() + ); + + self + } + + pub(crate) fn assert_msg_has(self, idx: usize, msg: &str) -> Self { + assert!( + self.error.statements[idx].message.contains(msg), + "[{}] expected error statement {}'s message to contain '{}' for {}", + self.test_name, idx, msg, self.assert_postfix() + ); + + self + } + + /// Seeks the index of the pattern in the context message, then checks if + /// the input position corresponds to that index. + pub (crate) fn assert_occurs_at(self, idx: usize, pattern: &str) -> Self { + let pos = self.error.statements[idx].context.find(pattern); + assert!( + pos.is_some(), + "[{}] incorrect occurs_at: '{}' could not be found in the context for {}", + self.test_name, pattern, self.assert_postfix() + ); + let pos = pos.unwrap(); + let col = self.error.statements[idx].position.col(); + assert_eq!( + pos + 1, col, + "[{}] Expected error to occur at column {}, but found it at {} for {}", + self.test_name, pos + 1, col, self.assert_postfix() + ); + + self + } + + fn assert_postfix(&self) -> String { + let mut v = String::new(); + v.push_str("error: ["); + for (idx, stmt) in self.error.statements.iter().enumerate() { + if idx != 0 { + v.push_str(", "); + } + + v.push_str(&format!("{{ context: {}, message: {} }}", &stmt.context, stmt.message)); + } + v.push(']'); + v + } +} + +//------------------------------------------------------------------------------ +// Generic utilities +//------------------------------------------------------------------------------ + +fn serialize_parser_type(buffer: &mut String, heap: &Heap, id: ParserTypeId) { + use ParserTypeVariant as PTV; + + let p = &heap[id]; + match &p.variant { + PTV::Message => buffer.push_str("msg"), + PTV::Bool => buffer.push_str("bool"), + PTV::Byte => buffer.push_str("byte"), + PTV::Short => buffer.push_str("short"), + PTV::Int => buffer.push_str("int"), + PTV::Long => buffer.push_str("long"), + PTV::String => buffer.push_str("string"), + PTV::IntegerLiteral => buffer.push_str("intlit"), + PTV::Inferred => buffer.push_str("auto"), + PTV::Array(sub_id) => { + serialize_parser_type(buffer, heap, *sub_id); + buffer.push_str("[]"); + }, + PTV::Input(sub_id) => { + buffer.push_str("in<"); + serialize_parser_type(buffer, heap, *sub_id); + buffer.push('>'); + }, + PTV::Output(sub_id) => { + buffer.push_str("out<"); + serialize_parser_type(buffer, heap, *sub_id); + buffer.push('>'); + }, + PTV::Symbolic(symbolic) => { + buffer.push_str(&String::from_utf8_lossy(&symbolic.identifier.value)); + if symbolic.poly_args.len() > 0 { + buffer.push('<'); + for (poly_idx, poly_arg) in symbolic.poly_args.iter().enumerate() { + if poly_idx != 0 { buffer.push(','); } + serialize_parser_type(buffer, heap, *poly_arg); + } + buffer.push('>'); + } + } + } +} \ No newline at end of file