From 814be01095d9c9a2b5e0dd1a07a27919f3799a23 2021-01-07 19:12:05 From: mh Date: 2021-01-07 19:12:05 Subject: [PATCH] implement module/version pragma, WIP on visitor rewrite --- diff --git a/src/lib.rs b/src/lib.rs index 082d77bfa9d40d481101efa02e087b80434f5322..8674f5eedb3dcf874836957d45bde0a956e0503a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -9,5 +9,10 @@ pub use common::{ConnectorId, EndpointPolarity, Payload, Polarity, PortId}; pub use protocol::{ProtocolDescription, TRIVIAL_PD}; pub use runtime::{error, Connector, DummyLogger, FileLogger, VecLogger}; +// TODO: Remove when not benchmarking +pub use protocol::inputsource::InputSource; +pub use protocol::ast::Heap; +pub use protocol::lexer::Lexer; + #[cfg(feature = "ffi")] pub mod ffi; diff --git a/src/protocol/ast.rs b/src/protocol/ast.rs index 6df72ed25770718b6ba4d0726a6d91f057bad956..e28067b5849b6d42322ef7524386c96b9a5aff3e 100644 --- a/src/protocol/ast.rs +++ b/src/protocol/ast.rs @@ -1246,14 +1246,36 @@ impl SyntaxElement for Root { } #[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] -pub struct Pragma { +pub enum Pragma { + Version(PragmaVersion), + Module(PragmaModule) +} + +#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] +pub struct PragmaVersion { + pub this: PragmaId, + // Phase 1: parser + pub position: InputPosition, + pub version: u64, +} + +#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] +pub struct PragmaModule { + pub this: PragmaId, + // Phase 1: parser + pub position: InputPosition, + pub value: Vec, +} + +#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] +pub struct PragmaOld { pub this: PragmaId, // Phase 1: parser pub position: InputPosition, pub value: Vec, } -impl SyntaxElement for Pragma { +impl SyntaxElement for PragmaOld { fn position(&self) -> InputPosition { self.position } @@ -1481,7 +1503,7 @@ impl SyntaxElement for TypeAnnotation { } type CharacterData = Vec; -type IntegerData = Vec; +type IntegerData = i64; #[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] pub enum Constant { diff --git a/src/protocol/eval.rs b/src/protocol/eval.rs index d6fcc3dd9b6176c43032e1d93c13bb99b0976436..1067b0e63a5e237820d4d75b49ba84ad0ad92b70 100644 --- a/src/protocol/eval.rs +++ b/src/protocol/eval.rs @@ -68,10 +68,9 @@ impl Value { Constant::Null => Value::Message(MessageValue(None)), Constant::True => Value::Boolean(BooleanValue(true)), Constant::False => Value::Boolean(BooleanValue(false)), - Constant::Integer(data) => { + Constant::Integer(val) => { // Convert raw ASCII data to UTF-8 string - let raw = String::from_utf8_lossy(data); - let val = raw.parse::().unwrap(); + let val = *val; if val >= BYTE_MIN && val <= BYTE_MAX { Value::Byte(ByteValue(val as i8)) } else if val >= SHORT_MIN && val <= SHORT_MAX { diff --git a/src/protocol/lexer.rs b/src/protocol/lexer.rs index 5b2dcbc031fe9e3cc91d414206cc344afffa8d74..8a42cf1d6bf8e956dfb682c1d763836d60e88c1e 100644 --- a/src/protocol/lexer.rs +++ b/src/protocol/lexer.rs @@ -58,6 +58,7 @@ fn is_integer_rest(x: Option) -> bool { || c >= b'A' && c <= b'F' || c == b'x' || c == b'X' + || c == b'o' } else { false } @@ -219,6 +220,48 @@ impl Lexer<'_> { } Ok(result) } + fn has_integer(&mut self) -> bool { + is_integer_start(self.source.next()) + } + fn consume_integer(&mut self) -> Result { + let position = self.source.pos(); + let mut data = Vec::new(); + let mut next = self.source.next(); + while is_integer_rest(next) { + data.push(next.unwrap()); + self.source.consume(); + next = self.source.next(); + } + + let data_len = data.len(); + debug_assert_ne!(data_len, 0); + if data_len == 1 { + debug_assert!(data[0] >= b'0' && data[0] <= b'9'); + return Ok((data[0] - b'0') as i64); + } else { + // TODO: Fix, u64 should be supported as well + let parsed = if data[1] == b'b' { + let data = String::from_utf8_lossy(&data[2..]); + i64::from_str_radix(&data, 2) + } else if data[1] == b'o' { + let data = String::from_utf8_lossy(&data[2..]); + i64::from_str_radix(&data, 8) + } else if data[1] == b'x' { + let data = String::from_utf8_lossy(&data[2..]); + i64::from_str_radix(&data, 16) + } else { + // Assume decimal + let data = String::from_utf8_lossy(&data); + i64::from_str_radix(&data, 10) + }; + + if let Err(_err) = parsed { + return Err(ParseError::new(position, "Invalid integer constant")); + } + + Ok(parsed.unwrap()) + } + } // Statement keywords @@ -1025,17 +1068,11 @@ impl Lexer<'_> { self.source.consume(); value = Constant::Character(data); } else { - let mut data = Vec::new(); - let mut next = self.source.next(); - if !is_integer_start(next) { + if !self.has_integer() { return Err(self.source.error("Expected integer constant")); } - while is_integer_rest(next) { - data.push(next.unwrap()); - self.source.consume(); - next = self.source.next(); - } - value = Constant::Integer(data); + + value = Constant::Integer(self.consume_integer()?); } Ok(h.alloc_constant_expression(|this| ConstantExpression { this, position, value })) } @@ -1574,8 +1611,38 @@ impl Lexer<'_> { if !is_vchar(self.source.next()) { return Err(self.source.error("Expected pragma")); } - let value = self.consume_line()?; - Ok(h.alloc_pragma(|this| Pragma { this, position, value })) + if self.has_string(b"version") { + self.consume_string(b"version")?; + self.consume_whitespace(true)?; + if !self.has_integer() { + return Err(self.source.error("Expected integer constant")); + } + let version = self.consume_integer()?; + debug_assert!(version >= 0); + return Ok(h.alloc_pragma(|this| Pragma::Version(PragmaVersion{ + this, position, version: version as u64 + }))) + } else if self.has_string(b"module") { + self.consume_string(b"module")?; + self.consume_whitespace(true)?; + if !self.has_identifier() { + return Err(self.source.error("Expected identifier")); + } + let mut value = Vec::new(); + let mut ident = self.consume_ident()?; + value.append(&mut ident); + while self.has_string(b".") { + self.consume_string(b".")?; + value.push(b'.'); + ident = self.consume_ident()?; + value.append(&mut ident); + } + return Ok(h.alloc_pragma(|this| Pragma::Module(PragmaModule{ + this, position, value + }))); + } else { + return Err(self.source.error("Unknown pragma")); + } } fn has_import(&self) -> bool { self.has_keyword(b"import") @@ -1635,10 +1702,37 @@ impl Lexer<'_> { } } -// #[cfg(test)] -// mod tests { -// use crate::protocol::ast::Expression::*; -// use crate::protocol::{ast, lexer::*}; +#[cfg(test)] +mod tests { + use crate::protocol::ast::Expression::*; + use crate::protocol::{ast, lexer::*}; + + #[test] + fn test_pragmas() { + let mut h = Heap::new(); + let mut input = InputSource::from_string(" + #version 0o7777 + #module something.dot.separated + ").expect("new InputSource"); + let mut lex = Lexer::new(&mut input); + let lexed = lex.consume_protocol_description(&mut h) + .expect("lex input source"); + let root = &h[lexed]; + assert_eq!(root.pragmas.len(), 2); + let pv = &h[root.pragmas[0]]; + let pm = &h[root.pragmas[1]]; + + if let Pragma::Version(v) = pv { + assert_eq!(v.version, 0o7777) + } else { + assert!(false, "first pragma not version"); + } + if let Pragma::Module(m) = pm { + assert_eq!(m.value, b"something.dot.separated"); + } else { + assert!(false, "second pragma not version"); + } + } // #[test] // fn test_lowercase() { @@ -1755,4 +1849,4 @@ impl Lexer<'_> { // } // } // } -// } +} diff --git a/src/protocol/mod.rs b/src/protocol/mod.rs index 392f0fa16177c236bccb99018a2a4f52753dba9c..1f368eea38d029456c89ac98ac19e4e4f8029e29 100644 --- a/src/protocol/mod.rs +++ b/src/protocol/mod.rs @@ -1,11 +1,15 @@ mod arena; -mod ast; +// mod ast; mod eval; pub(crate) mod inputsource; -mod lexer; +// mod lexer; mod library; mod parser; +// TODO: Remove when not benchmarking +pub(crate) mod ast; +pub(crate) mod lexer; + lazy_static::lazy_static! { /// Conveniently-provided protocol description initialized with a zero-length PDL string. /// Exposed to minimize repeated initializations of this common protocol description. diff --git a/src/protocol/parser/depth_visitor.rs b/src/protocol/parser/depth_visitor.rs new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/src/protocol/parser/shallow_visitor.rs b/src/protocol/parser/shallow_visitor.rs new file mode 100644 index 0000000000000000000000000000000000000000..87ffebf5cc573b7a757ee6a7ad99f70cf736f9a3 --- /dev/null +++ b/src/protocol/parser/shallow_visitor.rs @@ -0,0 +1,10 @@ +use crate::protocol::ast::*; +use crate::protocol::input_source::*; +use crate::protocol::lexer::*; + +type Unit = (); +type VisitorResult = Result; + +trait ShallowVisitor: Sized { + fn visit_protocol_description(&mut self, h: &mut Heap, pd: RootId) -> +} \ No newline at end of file diff --git a/src/protocol/parser/visitor.rs b/src/protocol/parser/visitor.rs new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391