Changeset - 392c59600687
[Not reviewed]
0 3 0
mh - 4 years ago 2021-05-31 11:37:55
contact@maxhenger.nl
Test non-ascii string literals and recursion
3 files changed with 57 insertions and 1 deletions:
0 comments (0 inline, 0 general)
src/protocol/input_source.rs
Show inline comments
 
use std::fmt;
 
use std::sync::{RwLock, RwLockReadGuard};
 
use std::fmt::Write;
 

	
 
#[derive(Debug, Clone, Copy)]
 
pub struct InputPosition {
 
    pub line: u32,
 
    pub offset: u32,
 
}
 

	
 
impl InputPosition {
 
    pub(crate) fn with_offset(&self, offset: u32) -> Self {
 
        InputPosition { line: self.line, offset: self.offset + offset }
 
    }
 
}
 

	
 
#[derive(Debug, Clone, Copy)]
 
pub struct InputSpan {
 
    pub begin: InputPosition,
 
    pub end: InputPosition,
 
}
 

	
 
impl InputSpan {
 
    // This will only be used for builtin functions
 
    #[inline]
 
    pub fn new() -> InputSpan {
 
        InputSpan{ begin: InputPosition{ line: 0, offset: 0 }, end: InputPosition{ line: 0, offset: 0 }}
 
    }
 

	
 
    #[inline]
 
    pub fn from_positions(begin: InputPosition, end: InputPosition) -> Self {
 
        Self { begin, end }
 
    }
 
}
 

	
 
/// Wrapper around source file with optional filename. Ensures that the file is
 
/// only scanned once.
 
pub struct InputSource {
 
    pub(crate) filename: String,
 
    pub(crate) input: Vec<u8>,
 
    // Iteration
 
    line: u32,
 
    offset: usize,
 
    // State tracking
 
    pub(crate) had_error: Option<ParseError>,
 
    // The offset_lookup is built on-demand upon attempting to report an error.
 
    // Only one procedure will actually create the lookup, afterwards only read
 
    // locks will be held.
 
    offset_lookup: RwLock<Vec<u32>>,
 
}
 

	
 
impl InputSource {
 
    pub fn new(filename: String, input: Vec<u8>) -> Self {
 
        Self{
 
            filename,
 
            input,
 
            line: 1,
 
            offset: 0,
 
            had_error: None,
 
            offset_lookup: RwLock::new(Vec::new()),
 
        }
 
    }
 

	
 
    #[cfg(test)]
 
    pub fn new_test(input: &str) -> Self {
 
        let bytes = Vec::from(input.as_bytes());
 
        return Self::new(String::from("test"), bytes)
 
    }
 

	
 
    #[inline]
 
    pub fn pos(&self) -> InputPosition {
 
        InputPosition { line: self.line, offset: self.offset as u32 }
 
    }
 

	
 
    pub fn next(&self) -> Option<u8> {
 
        if self.offset < self.input.len() {
 
            Some(self.input[self.offset])
 
        } else {
 
            None
 
        }
 
    }
 

	
 
    pub fn lookahead(&self, offset: usize) -> Option<u8> {
 
        let offset_pos = self.offset + offset;
 
        if offset_pos < self.input.len() {
 
            Some(self.input[offset_pos])
 
        } else {
 
            None
 
        }
 
    }
 

	
 
    #[inline]
 
    pub fn section_at_pos(&self, start: InputPosition, end: InputPosition) -> &[u8] {
 
        &self.input[start.offset as usize..end.offset as usize]
 
    }
 

	
 
    #[inline]
 
    pub fn section_at_span(&self, span: InputSpan) -> &[u8] {
 
        &self.input[span.begin.offset as usize..span.end.offset as usize]
 
    }
 

	
 
    // Consumes the next character. Will check well-formedness of newlines: \r
 
    // must be followed by a \n, because this is used for error reporting. Will
 
    // not check for ascii-ness of the file, better left to a tokenizer.
 
    pub fn consume(&mut self) {
 
        match self.next() {
 
            Some(b'\r') => {
 
                if Some(b'\n') == self.lookahead(1) {
 
                    // Well formed file
 
                    self.offset += 1;
 
                } else {
 
                    // Not a well-formed file, pretend like we can continue
 
                    self.offset += 1;
 
                    self.set_error("Encountered carriage-feed without a following newline");
 
                }
 
            },
 
            Some(b'\n') => {
 
                self.line += 1;
 
                self.offset += 1;
 
            },
 
            Some(_) => {
 
                self.offset += 1;
 
            }
 
            None => {}
 
        }
 

	
 
        // Maybe we actually want to check this in release mode. Then again:
 
        // a 4 gigabyte source file... Really?
 
        debug_assert!(self.offset < u32::max_value() as usize);
 
    }
 

	
 
    fn set_error(&mut self, msg: &str) {
 
        if self.had_error.is_none() {
 
            self.had_error = Some(ParseError::new_error_str_at_pos(self, self.pos(), msg));
 
        }
 
    }
 

	
 
    fn get_lookup(&self) -> RwLockReadGuard<Vec<u32>> {
 
        // Once constructed the lookup always contains one element. We use this
 
        // to see if it is constructed already.
 
        {
 
            let lookup = self.offset_lookup.read().unwrap();
 
            if !lookup.is_empty() {
 
                return lookup;
 
            }
 
        }
 

	
 
        // Lookup was not constructed yet
 
        let mut lookup = self.offset_lookup.write().unwrap();
 
        if !lookup.is_empty() {
 
            // Somebody created it before we had the chance
 
            drop(lookup);
 
            let lookup = self.offset_lookup.read().unwrap();
 
            return lookup;
 
        }
 

	
 
        // Build the line number (!) to offset lookup, so offset by 1. We 
 
        // assume the entire source file is scanned (most common case) for
 
        // preallocation.
 
        lookup.reserve(self.line as usize + 2);
 
        lookup.push(0); // line 0: never used
 
        lookup.push(0); // first line: first character
 

	
 
        for char_idx in 0..self.input.len() {
 
            if self.input[char_idx] == b'\n' {
 
                lookup.push(char_idx as u32 + 1);
 
            }
 
        }
 

	
 
        lookup.push(self.input.len() as u32 + 1); // for lookup_line_end, intentionally adding one character
 
        debug_assert_eq!(self.line as usize + 2, lookup.len(), "remove me: i am a testing assert and sometimes invalid");
 

	
 
        // Return created lookup
 
        drop(lookup);
 
        let lookup = self.offset_lookup.read().unwrap();
 
        return lookup;
 
    }
 

	
 
    /// Retrieves offset at which line starts (right after newline)
 
    fn lookup_line_start_offset(&self, line_number: u32) -> u32 {
 
        let lookup = self.get_lookup();
 
        lookup[line_number as usize]
 
    }
 

	
 
    /// Retrieves offset at which line ends (at the newline character or the
 
    /// preceding carriage feed for \r\n-encoded newlines)
 
    fn lookup_line_end_offset(&self, line_number: u32) -> u32 {
 
        let lookup = self.get_lookup();
 
        let offset = lookup[(line_number + 1) as usize] - 1;
 
        let offset_usize = offset as usize;
 

	
 
        // Compensate for newlines and a potential carriage feed. Note that the
 
        // end position is exclusive. So we only need to compensate for a
 
        // "\r\n"
 
        if offset_usize > 0 && offset_usize < self.input.len() && self.input[offset_usize] == b'\n' && self.input[offset_usize - 1] == b'\r' {
 
            offset - 1
 
        } else {
 
            offset
 
        }
 
    }
 
}
 

	
 
#[derive(Debug)]
 
pub enum StatementKind {
 
    Info,
 
    Error
 
}
 

	
 
#[derive(Debug)]
 
pub enum ContextKind {
 
    SingleLine,
 
    MultiLine,
 
}
 

	
 
#[derive(Debug)]
 
pub struct ErrorStatement {
 
    pub(crate) statement_kind: StatementKind,
 
    pub(crate) context_kind: ContextKind,
 
    pub(crate) start_line: u32,
 
    pub(crate) start_column: u32,
 
    pub(crate) end_line: u32,
 
    pub(crate) end_column: u32,
 
    pub(crate) filename: String,
 
    pub(crate) context: String,
 
    pub(crate) message: String,
 
}
 

	
 
impl ErrorStatement {
 
    fn from_source_at_pos(statement_kind: StatementKind, source: &InputSource, position: InputPosition, message: String) -> Self {
 
        // Seek line start and end
 
        let line_start = source.lookup_line_start_offset(position.line);
 
        let line_end = source.lookup_line_end_offset(position.line);
 
        let context = Self::create_context(source, line_start as usize, line_end as usize);
 
        debug_assert!(position.offset >= line_start);
 
        let column = position.offset - line_start + 1;
 

	
 
        Self{
 
            statement_kind,
 
            context_kind: ContextKind::SingleLine,
 
            start_line: position.line,
 
            start_column: column,
 
            end_line: position.line,
 
            end_column: column + 1,
 
            filename: source.filename.clone(),
 
            context,
 
            message,
 
        }
 
    }
 

	
 
    pub(crate) fn from_source_at_span(statement_kind: StatementKind, source: &InputSource, span: InputSpan, message: String) -> Self {
 
        debug_assert!(span.end.line >= span.begin.line);
 
        debug_assert!(span.end.offset >= span.begin.offset);
 

	
 
        let first_line_start = source.lookup_line_start_offset(span.begin.line);
 
        let last_line_start = source.lookup_line_start_offset(span.end.line);
 
        let last_line_end = source.lookup_line_end_offset(span.end.line);
 
        let context = Self::create_context(source, first_line_start as usize, last_line_end as usize);
 
        debug_assert!(span.begin.offset >= first_line_start);
 
        let start_column = span.begin.offset - first_line_start + 1;
 
        let end_column = span.end.offset - last_line_start + 1;
 

	
 
        let context_kind = if span.begin.line == span.end.line {
 
            ContextKind::SingleLine
 
        } else {
 
            ContextKind::MultiLine
 
        };
 

	
 
        Self{
 
            statement_kind,
 
            context_kind,
 
            start_line: span.begin.line,
 
            start_column,
 
            end_line: span.end.line,
 
            end_column,
 
            filename: source.filename.clone(),
 
            context,
 
            message,
 
        }
 
    }
 

	
 
    /// Produces context from source
 
    fn create_context(source: &InputSource, start: usize, end: usize) -> String {
 
        let context_raw = &source.input[start..end];
 
        String::from_utf8_lossy(context_raw).to_string()
 
    }
 
}
 

	
 
impl fmt::Display for ErrorStatement {
 
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
 
        // Write kind of statement and message
 
        match self.statement_kind {
 
            StatementKind::Info => f.write_str(" INFO: ")?,
 
            StatementKind::Error => f.write_str("ERROR: ")?,
 
        }
 
        f.write_str(&self.message)?;
 
        f.write_char('\n')?;
 

	
 
        // Write originating file/line/column
 
        f.write_str(" +- ")?;
 
        if !self.filename.is_empty() {
 
            write!(f, "in {} ", self.filename)?;
 
        }
 

	
 
        match self.context_kind {
 
            ContextKind::SingleLine => writeln!(f, " at {}:{}", self.start_line, self.start_column),
 
            ContextKind::MultiLine => writeln!(
 
                f, " from {}:{} to {}:{}",
 
                self.start_line, self.start_column, self.end_line, self.end_column
 
            )
 
        }?;
 

	
 
        // Helper function for writing context: converting tabs into 4 spaces
 
        // (oh, the controversy!) and creating an annotated line
 
        fn transform_context(source: &str, target: &mut String) {
 
            for char in source.chars() {
 
                if char == '\t' {
 
                    target.push_str("    ");
 
                } else {
 
                    target.push(char);
 
                }
 
            }
 
        }
 

	
 
        fn extend_annotation(first_col: u32, last_col: u32, source: &str, target: &mut String, extend_char: char) {
 
            debug_assert!(first_col > 0 && last_col > first_col);
 

	
 
            // If the first index exceeds the size of the context then we should
 
            // have a message placed at the newline character
 
            let first_idx = first_col as usize - 1;
 
            let last_idx = last_col as usize - 1;
 
            if first_idx >= source.len() {
 
                // If any of these fail then the logic behind reporting errors
 
                // is incorrect.
 
                debug_assert_eq!(first_idx, source.len());
 
                debug_assert_eq!(first_idx + 1, last_idx);
 
                target.push(extend_char);
 
            } else {
 
                for (char_idx, char) in source.chars().enumerate().skip(first_idx) {
 
                    if char_idx == last_idx as usize {
 
                        break;
 
                    }
 

	
 
                    if char == '\t' {
 
                        for _ in 0..4 { target.push(extend_char); }
 
                    } else {
 
                        target.push(extend_char);
 
                    }
 
                }
 
            }
 
        }
 

	
 
        // Write source context
 
        writeln!(f, " | ")?;
 

	
 
        let mut context = String::with_capacity(128);
 
        let mut annotation = String::with_capacity(128);
 

	
 
        match self.context_kind {
 
            ContextKind::SingleLine => {
 
                // Write single line of context with indicator for the offending
 
                // span underneath.
 
                context.push_str(" |  ");
 
                transform_context(&self.context, &mut context);
src/protocol/tests/eval_calls.rs
Show inline comments
 
use super::*;
 

	
 
#[test]
 
fn test_function_call() {
 
    Tester::new_single_source_expect_ok("with literal arg", "
 
    func add_two(u32 value) -> u32 {
 
        return value + 2;
 
    }
 
    func foo() -> u32 {
 
        return add_two(5);
 
    }
 
    ").for_function("foo", |f| {
 
        f.call_ok(Some(Value::UInt32(7)));
 
    });
 

	
 
    Tester::new_single_source_expect_ok("with variable arg", "
 
    func add_two(u32 value) -> u32 {
 
        value += 1;
 
        return value + 1;
 
    }
 
    func foo() -> bool {
 
        auto initial = 5;
 
        auto result = add_two(initial);
 
        return initial == 5 && result == 7;
 
    }").for_function("foo", |f| {
 
        f.call_ok(Some(Value::Bool(true)));
 
    });
 
}
 

	
 
#[test]
 
fn test_recursion() {
 
    // Single-chain
 
    Tester::new_single_source_expect_ok("factorial", "
 
    func horribly_slow_factorial(u32 term) -> u32 {
 
        if (term <= 0) { return 1; }
 

	
 
        return term * horribly_slow_factorial(term - 1);
 
    }
 
    func foo() -> u32 {
 
        return horribly_slow_factorial(10);
 
    }
 
    ").for_function("foo", |f| {
 
        f.call_ok(Some(Value::UInt32(3628800)));
 
    });
 

	
 
    // Multi-chain horribleness
 
    Tester::new_single_source_expect_ok("fibonacci", "
 
    func horribly_slow_fibo(u32 term) -> u32 {
 
        if (term <= 1) {
 
            return 1;
 
        }
 
        return horribly_slow_fibo(term - 2) + horribly_slow_fibo(term - 1);
 
    }
 
    func foo() -> u32 {
 
        return horribly_slow_fibo(10);
 
    }").for_function("foo", |f| {
 
        f.call_ok(Some(Value::UInt32(89)));
 
    });
 

	
 
    // Mutual recursion (in a contrived fashion, ofcourse)
 
    Tester::new_single_source_expect_ok("mutual recursion", "
 
    func collatz_even(u32 iter, u32 value) -> u32 {
 
        value = value / 2;
 
        if (value % 2 == 0) return collatz_even(iter + 1, value);
 
        else                return collatz_odd(iter + 1, value);
 
    }
 
    func collatz_odd(u32 iter, u32 value) -> u32 {
 
        if (value <= 1) return iter;
 

	
 
        value = 3 * value + 1;
 
        if (value % 2 == 0) return collatz_even(iter + 1, value);
 
        else                return collatz_odd(iter + 1, value);
 
    }
 
    func foo() -> u32 {
 
        return collatz_odd(1, 19);
 
    }
 
    ").for_function("foo", |f| {
 
        f.call_ok(Some(Value::UInt32(21)));
 
    });
 
}
 

	
 
#[test]
 
fn test_empty_blocks() {
 
    // Yes this is silly, but I managed to make this a bug before
 
    Tester::new_single_source_expect_ok("traversing empty statements", "
 
    func foo() -> u32 {
 
        auto val = 128;
 
        if (true) {}
 
        while (false) {}
 
        return val;
 
    }
 
    ").for_function("foo", |f| { f.call_ok(Some(Value::UInt32(128))); });
 
}
 
\ No newline at end of file
src/protocol/tests/parser_literals.rs
Show inline comments
 
use super::*;
 

	
 
#[test]
 
fn test_binary_literals() {
 
    Tester::new_single_source_expect_ok("valid", "
 
        func test() -> u32 {
 
            u8  v1 = 0b0100_0010;
 
            u16 v2 = 0b10101010;
 
            u32 v3 = 0b10000001_01111110;
 
            u64 v4 = 0b1001_0110_1001_0110;
 

	
 
            return 0b10110;
 
        }
 
    ");
 

	
 
    Tester::new_single_source_expect_err("invalid character", "
 
        func test() -> u32 {
 
            return 0b10011001_10012001;
 
        }
 
    ").error(|e| { e.assert_msg_has(0, "incorrectly formatted binary number"); });
 

	
 
    Tester::new_single_source_expect_err("no characters", "
 
        func test() -> u32 { return 0b; }
 
    ").error(|e| { e.assert_msg_has(0, "incorrectly formatted binary number"); });
 

	
 
    Tester::new_single_source_expect_err("only separators", "
 
        func test() -> u32 { return 0b____; }
 
    ").error(|e| { e.assert_msg_has(0, "incorrectly formatted binary number"); });
 
}
 

	
 
#[test]
 
fn test_string_literals() {
 
    Tester::new_single_source_expect_ok("valid", "
 
        func test() -> string {
 
            auto v1 = \"Hello, world!\";
 
            auto v2 = \"\\t\\r\\n\\\\\"; // why hello there, confusing thing
 
            auto v3 = \"\";
 
            return \"No way, dude!\";
 
        }
 
    ").for_function("test", |f| { f
 
        .for_variable("v1", |v| { v.assert_concrete_type("string"); })
 
        .for_variable("v2", |v| { v.assert_concrete_type("string"); })
 
        .for_variable("v3", |v| { v.assert_concrete_type("string"); });
 
    });
 

	
 
    Tester::new_single_source_expect_err("unterminated simple", "
 
        func test() -> string { return \"'; }
 
    ").error(|e| { e
 
        .assert_num(1)
 
        .assert_occurs_at(0, "\"")
 
        .assert_msg_has(0, "unterminated");
 
    });
 

	
 
    Tester::new_single_source_expect_err("unterminated with preceding escaped", "
 
        func test() -> string { return \"\\\"; }
 
    ").error(|e| { e
 
        .assert_num(1)
 
        .assert_occurs_at(0, "\"\\")
 
        .assert_msg_has(0, "unterminated");
 
    });
 

	
 
    Tester::new_single_source_expect_err("invalid escaped character", "
 
        func test() -> string { return \"\\y\"; }
 
    ").error(|e| { e.assert_msg_has(0, "unsupported escape character 'y'"); });
 

	
 
    // Note sure if this should always be in here...
 
    Tester::new_single_source_expect_err("non-ASCII string", "
 
        func test() -> string { return \"💧\"; }
 
    ").error(|e| { e.assert_msg_has(0, "non-ASCII character in string literal"); });
 
}
 
\ No newline at end of file
0 comments (0 inline, 0 general)