CSY/reowolf Changeset - 392c59600687 · Centrum Wiskunde & Informatica (CWI)

Changeset - 392c59600687

Parent rev.

Child rev.

[Not reviewed]

0 3 0

mh - 4 years ago 2021-05-31 11:37:55
contact@maxhenger.nl

Test non-ascii string literals and recursion

3 files changed with 57 insertions and 1 deletions:

src/protocol/input_source.rs

src/protocol/tests/eval_calls.rs

src/protocol/tests/parser_literals.rs

0 comments (0 inline, 0 general)

src/protocol/input_source.rs

➞

Show inline comments

 use std::fmt;
 use std::sync::{RwLock, RwLockReadGuard};
 use std::fmt::Write;
 #[derive(Debug, Clone, Copy)]
 pub struct InputPosition {
     pub line: u32,
     pub offset: u32,
+}
 impl InputPosition {
     pub(crate) fn with_offset(&self, offset: u32) -> Self {
         InputPosition { line: self.line, offset: self.offset + offset }
+    }
+}
 #[derive(Debug, Clone, Copy)]
 pub struct InputSpan {
     pub begin: InputPosition,
     pub end: InputPosition,
+}
 impl InputSpan {
     // This will only be used for builtin functions
     #[inline]
     pub fn new() -> InputSpan {
         InputSpan{ begin: InputPosition{ line: 0, offset: 0 }, end: InputPosition{ line: 0, offset: 0 }}
+    }
     #[inline]
     pub fn from_positions(begin: InputPosition, end: InputPosition) -> Self {
         Self { begin, end }
+    }
+}
 /// Wrapper around source file with optional filename. Ensures that the file is
 /// only scanned once.
 pub struct InputSource {
     pub(crate) filename: String,
     pub(crate) input: Vec<u8>,
     // Iteration
     line: u32,
     offset: usize,
     // State tracking
     pub(crate) had_error: Option<ParseError>,
     // The offset_lookup is built on-demand upon attempting to report an error.
     // Only one procedure will actually create the lookup, afterwards only read
     // locks will be held.
     offset_lookup: RwLock<Vec<u32>>,
+}
 impl InputSource {
     pub fn new(filename: String, input: Vec<u8>) -> Self {
         Self{
             filename,
             input,
             line: 1,
             offset: 0,
             had_error: None,
             offset_lookup: RwLock::new(Vec::new()),
+        }
+    }
     #[cfg(test)]
     pub fn new_test(input: &str) -> Self {
         let bytes = Vec::from(input.as_bytes());
         return Self::new(String::from("test"), bytes)
+    }
     #[inline]
     pub fn pos(&self) -> InputPosition {
         InputPosition { line: self.line, offset: self.offset as u32 }
+    }
     pub fn next(&self) -> Option<u8> {
         if self.offset < self.input.len() {
             Some(self.input[self.offset])
         } else {
             None
+        }
+    }
     pub fn lookahead(&self, offset: usize) -> Option<u8> {
         let offset_pos = self.offset + offset;
         if offset_pos < self.input.len() {
             Some(self.input[offset_pos])
         } else {
             None
+        }
+    }
     #[inline]
     pub fn section_at_pos(&self, start: InputPosition, end: InputPosition) -> &[u8] {
         &self.input[start.offset as usize..end.offset as usize]
+    }
     #[inline]
     pub fn section_at_span(&self, span: InputSpan) -> &[u8] {
         &self.input[span.begin.offset as usize..span.end.offset as usize]
+    }
     // Consumes the next character. Will check well-formedness of newlines: \r
     // must be followed by a \n, because this is used for error reporting. Will
     // not check for ascii-ness of the file, better left to a tokenizer.
     pub fn consume(&mut self) {
         match self.next() {
             Some(b'\r') => {
                 if Some(b'\n') == self.lookahead(1) {
                     // Well formed file
                     self.offset += 1;
                 } else {
                     // Not a well-formed file, pretend like we can continue
                     self.offset += 1;
                     self.set_error("Encountered carriage-feed without a following newline");
+                }
             },
             Some(b'\n') => {
                 self.line += 1;
                 self.offset += 1;
             },
             Some(_) => {
                 self.offset += 1;
+            }
             None => {}
+        }
         // Maybe we actually want to check this in release mode. Then again:
         // a 4 gigabyte source file... Really?
         debug_assert!(self.offset < u32::max_value() as usize);
+    }
     fn set_error(&mut self, msg: &str) {
         if self.had_error.is_none() {
             self.had_error = Some(ParseError::new_error_str_at_pos(self, self.pos(), msg));
+        }
+    }
     fn get_lookup(&self) -> RwLockReadGuard<Vec<u32>> {
         // Once constructed the lookup always contains one element. We use this
         // to see if it is constructed already.
+        {
             let lookup = self.offset_lookup.read().unwrap();
             if !lookup.is_empty() {
                 return lookup;
+            }
+        }
         // Lookup was not constructed yet
         let mut lookup = self.offset_lookup.write().unwrap();
         if !lookup.is_empty() {
             // Somebody created it before we had the chance
             drop(lookup);
             let lookup = self.offset_lookup.read().unwrap();
             return lookup;
+        }
         // Build the line number (!) to offset lookup, so offset by 1. We
         // assume the entire source file is scanned (most common case) for
         // preallocation.
         lookup.reserve(self.line as usize + 2);
         lookup.push(0); // line 0: never used
         lookup.push(0); // first line: first character
         for char_idx in 0..self.input.len() {
             if self.input[char_idx] == b'\n' {
                 lookup.push(char_idx as u32 + 1);
+            }
+        }
         lookup.push(self.input.len() as u32 + 1); // for lookup_line_end, intentionally adding one character
         debug_assert_eq!(self.line as usize + 2, lookup.len(), "remove me: i am a testing assert and sometimes invalid");
         // Return created lookup
         drop(lookup);
         let lookup = self.offset_lookup.read().unwrap();
         return lookup;
+    }
     /// Retrieves offset at which line starts (right after newline)
     fn lookup_line_start_offset(&self, line_number: u32) -> u32 {
         let lookup = self.get_lookup();
         lookup[line_number as usize]
+    }
     /// Retrieves offset at which line ends (at the newline character or the
     /// preceding carriage feed for \r\n-encoded newlines)
     fn lookup_line_end_offset(&self, line_number: u32) -> u32 {
         let lookup = self.get_lookup();
         let offset = lookup[(line_number + 1) as usize] - 1;
         let offset_usize = offset as usize;
         // Compensate for newlines and a potential carriage feed. Note that the
         // end position is exclusive. So we only need to compensate for a
         // "\r\n"
         if offset_usize > 0 && offset_usize < self.input.len() && self.input[offset_usize] == b'\n' && self.input[offset_usize - 1] == b'\r' {
             offset - 1
         } else {
             offset
+        }
+    }
+}
 #[derive(Debug)]
 pub enum StatementKind {
     Info,
     Error
+}
 #[derive(Debug)]
 pub enum ContextKind {
     SingleLine,
     MultiLine,
+}
 #[derive(Debug)]
 pub struct ErrorStatement {
     pub(crate) statement_kind: StatementKind,
     pub(crate) context_kind: ContextKind,
     pub(crate) start_line: u32,
     pub(crate) start_column: u32,
     pub(crate) end_line: u32,
     pub(crate) end_column: u32,
     pub(crate) filename: String,
     pub(crate) context: String,
     pub(crate) message: String,
+}
 impl ErrorStatement {
     fn from_source_at_pos(statement_kind: StatementKind, source: &InputSource, position: InputPosition, message: String) -> Self {
         // Seek line start and end
         let line_start = source.lookup_line_start_offset(position.line);
         let line_end = source.lookup_line_end_offset(position.line);
         let context = Self::create_context(source, line_start as usize, line_end as usize);
         debug_assert!(position.offset >= line_start);
         let column = position.offset - line_start + 1;
         Self{
             statement_kind,
             context_kind: ContextKind::SingleLine,
             start_line: position.line,
             start_column: column,
             end_line: position.line,
             end_column: column + 1,
             filename: source.filename.clone(),
             context,
             message,
+        }
+    }
     pub(crate) fn from_source_at_span(statement_kind: StatementKind, source: &InputSource, span: InputSpan, message: String) -> Self {
         debug_assert!(span.end.line >= span.begin.line);
         debug_assert!(span.end.offset >= span.begin.offset);
         let first_line_start = source.lookup_line_start_offset(span.begin.line);
         let last_line_start = source.lookup_line_start_offset(span.end.line);
         let last_line_end = source.lookup_line_end_offset(span.end.line);
         let context = Self::create_context(source, first_line_start as usize, last_line_end as usize);
         debug_assert!(span.begin.offset >= first_line_start);
         let start_column = span.begin.offset - first_line_start + 1;
         let end_column = span.end.offset - last_line_start + 1;
         let context_kind = if span.begin.line == span.end.line {
             ContextKind::SingleLine
         } else {
             ContextKind::MultiLine
         };
         Self{
             statement_kind,
             context_kind,
             start_line: span.begin.line,
             start_column,
             end_line: span.end.line,
             end_column,
             filename: source.filename.clone(),
             context,
             message,
+        }
+    }
     /// Produces context from source
     fn create_context(source: &InputSource, start: usize, end: usize) -> String {
         let context_raw = &source.input[start..end];
         String::from_utf8_lossy(context_raw).to_string()
+    }
+}
 impl fmt::Display for ErrorStatement {
     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
         // Write kind of statement and message
         match self.statement_kind {
             StatementKind::Info => f.write_str(" INFO: ")?,
             StatementKind::Error => f.write_str("ERROR: ")?,
+        }
         f.write_str(&self.message)?;
         f.write_char('\n')?;
         // Write originating file/line/column
         f.write_str(" +- ")?;
         if !self.filename.is_empty() {
             write!(f, "in {} ", self.filename)?;
+        }
         match self.context_kind {
             ContextKind::SingleLine => writeln!(f, " at {}:{}", self.start_line, self.start_column),
             ContextKind::MultiLine => writeln!(
                 f, " from {}:{} to {}:{}",
                 self.start_line, self.start_column, self.end_line, self.end_column
+            )
         }?;
         // Helper function for writing context: converting tabs into 4 spaces
         // (oh, the controversy!) and creating an annotated line
         fn transform_context(source: &str, target: &mut String) {
             for char in source.chars() {
                 if char == '\t' {
                     target.push_str("    ");
                 } else {
                     target.push(char);
+                }
+            }
+        }
         fn extend_annotation(first_col: u32, last_col: u32, source: &str, target: &mut String, extend_char: char) {
             debug_assert!(first_col > 0 && last_col > first_col);
             // If the first index exceeds the size of the context then we should
             // have a message placed at the newline character
             let first_idx = first_col as usize - 1;
             let last_idx = last_col as usize - 1;
             if first_idx >= source.len() {
                 // If any of these fail then the logic behind reporting errors
                 // is incorrect.
                 debug_assert_eq!(first_idx, source.len());
                 debug_assert_eq!(first_idx + 1, last_idx);
                 target.push(extend_char);
             } else {
                 for (char_idx, char) in source.chars().enumerate().skip(first_idx) {
                     if char_idx == last_idx as usize {
                         break;
+                    }
                     if char == '\t' {
                         for _ in 0..4 { target.push(extend_char); }
                     } else {
                         target.push(extend_char);
+                    }
+                }
+            }
+        }
         // Write source context
         writeln!(f, " | ")?;
         let mut context = String::with_capacity(128);
         let mut annotation = String::with_capacity(128);
         match self.context_kind {
             ContextKind::SingleLine => {
                 // Write single line of context with indicator for the offending
                 // span underneath.
                 context.push_str(" |  ");
                 transform_context(&self.context, &mut context);

src/protocol/tests/eval_calls.rs

➞

Show inline comments

 use super::*;
 #[test]
 fn test_function_call() {
     Tester::new_single_source_expect_ok("with literal arg", "
     func add_two(u32 value) -> u32 {
         return value + 2;
+    }
     func foo() -> u32 {
         return add_two(5);
+    }
     ").for_function("foo", |f| {
         f.call_ok(Some(Value::UInt32(7)));
     });
     Tester::new_single_source_expect_ok("with variable arg", "
     func add_two(u32 value) -> u32 {
         value += 1;
         return value + 1;
+    }
     func foo() -> bool {
         auto initial = 5;
         auto result = add_two(initial);
         return initial == 5 && result == 7;
     }").for_function("foo", |f| {
         f.call_ok(Some(Value::Bool(true)));
     });
+}
 #[test]
 fn test_recursion() {
     // Single-chain
     Tester::new_single_source_expect_ok("factorial", "
     func horribly_slow_factorial(u32 term) -> u32 {
         if (term <= 0) { return 1; }
         return term * horribly_slow_factorial(term - 1);
+    }
     func foo() -> u32 {
         return horribly_slow_factorial(10);
+    }
     ").for_function("foo", |f| {
         f.call_ok(Some(Value::UInt32(3628800)));
     });
     // Multi-chain horribleness
     Tester::new_single_source_expect_ok("fibonacci", "
     func horribly_slow_fibo(u32 term) -> u32 {
         if (term <= 1) {
             return 1;
+        }
         return horribly_slow_fibo(term - 2) + horribly_slow_fibo(term - 1);
+    }
     func foo() -> u32 {
         return horribly_slow_fibo(10);
     }").for_function("foo", |f| {
         f.call_ok(Some(Value::UInt32(89)));
     });
     // Mutual recursion (in a contrived fashion, ofcourse)
     Tester::new_single_source_expect_ok("mutual recursion", "
     func collatz_even(u32 iter, u32 value) -> u32 {
         value = value / 2;
         if (value % 2 == 0) return collatz_even(iter + 1, value);
         else                return collatz_odd(iter + 1, value);
+    }
     func collatz_odd(u32 iter, u32 value) -> u32 {
         if (value <= 1) return iter;
         value = 3 * value + 1;
         if (value % 2 == 0) return collatz_even(iter + 1, value);
         else                return collatz_odd(iter + 1, value);
+    }
     func foo() -> u32 {
         return collatz_odd(1, 19);
+    }
     ").for_function("foo", |f| {
         f.call_ok(Some(Value::UInt32(21)));
     });
+}
 #[test]
 fn test_empty_blocks() {
     // Yes this is silly, but I managed to make this a bug before
     Tester::new_single_source_expect_ok("traversing empty statements", "
     func foo() -> u32 {
         auto val = 128;
         if (true) {}
         while (false) {}
         return val;
+    }
     ").for_function("foo", |f| { f.call_ok(Some(Value::UInt32(128))); });
+}
@@ \ No newline at end of file @@

src/protocol/tests/parser_literals.rs

➞

Show inline comments

 use super::*;
 #[test]
 fn test_binary_literals() {
     Tester::new_single_source_expect_ok("valid", "
         func test() -> u32 {
             u8  v1 = 0b0100_0010;
             u16 v2 = 0b10101010;
             u32 v3 = 0b10000001_01111110;
             u64 v4 = 0b1001_0110_1001_0110;
             return 0b10110;
+        }
     ");
     Tester::new_single_source_expect_err("invalid character", "
         func test() -> u32 {
             return 0b10011001_10012001;
+        }
     ").error(|e| { e.assert_msg_has(0, "incorrectly formatted binary number"); });
     Tester::new_single_source_expect_err("no characters", "
         func test() -> u32 { return 0b; }
     ").error(|e| { e.assert_msg_has(0, "incorrectly formatted binary number"); });
     Tester::new_single_source_expect_err("only separators", "
         func test() -> u32 { return 0b____; }
     ").error(|e| { e.assert_msg_has(0, "incorrectly formatted binary number"); });
+}
 #[test]
 fn test_string_literals() {
     Tester::new_single_source_expect_ok("valid", "
         func test() -> string {
             auto v1 = \"Hello, world!\";
             auto v2 = \"\\t\\r\\n\\\\\"; // why hello there, confusing thing
             auto v3 = \"\";
             return \"No way, dude!\";
+        }
     ").for_function("test", |f| { f
         .for_variable("v1", |v| { v.assert_concrete_type("string"); })
         .for_variable("v2", |v| { v.assert_concrete_type("string"); })
         .for_variable("v3", |v| { v.assert_concrete_type("string"); });
     });
     Tester::new_single_source_expect_err("unterminated simple", "
         func test() -> string { return \"'; }
     ").error(|e| { e
         .assert_num(1)
         .assert_occurs_at(0, "\"")
         .assert_msg_has(0, "unterminated");
     });
     Tester::new_single_source_expect_err("unterminated with preceding escaped", "
         func test() -> string { return \"\\\"; }
     ").error(|e| { e
         .assert_num(1)
         .assert_occurs_at(0, "\"\\")
         .assert_msg_has(0, "unterminated");
     });
     Tester::new_single_source_expect_err("invalid escaped character", "
         func test() -> string { return \"\\y\"; }
     ").error(|e| { e.assert_msg_has(0, "unsupported escape character 'y'"); });
     // Note sure if this should always be in here...
     Tester::new_single_source_expect_err("non-ASCII string", "
         func test() -> string { return \"💧\"; }
     ").error(|e| { e.assert_msg_has(0, "non-ASCII character in string literal"); });
+}
@@ \ No newline at end of file @@

0 comments (0 inline, 0 general)