Changeset - 392c59600687
[Not reviewed]
0 3 0
mh - 4 years ago 2021-05-31 11:37:55
contact@maxhenger.nl
Test non-ascii string literals and recursion
3 files changed with 57 insertions and 1 deletions:
0 comments (0 inline, 0 general)
src/protocol/input_source.rs
Show inline comments
 
@@ -75,193 +75,192 @@ impl InputSource {
 
    pub fn next(&self) -> Option<u8> {
 
        if self.offset < self.input.len() {
 
            Some(self.input[self.offset])
 
        } else {
 
            None
 
        }
 
    }
 

	
 
    pub fn lookahead(&self, offset: usize) -> Option<u8> {
 
        let offset_pos = self.offset + offset;
 
        if offset_pos < self.input.len() {
 
            Some(self.input[offset_pos])
 
        } else {
 
            None
 
        }
 
    }
 

	
 
    #[inline]
 
    pub fn section_at_pos(&self, start: InputPosition, end: InputPosition) -> &[u8] {
 
        &self.input[start.offset as usize..end.offset as usize]
 
    }
 

	
 
    #[inline]
 
    pub fn section_at_span(&self, span: InputSpan) -> &[u8] {
 
        &self.input[span.begin.offset as usize..span.end.offset as usize]
 
    }
 

	
 
    // Consumes the next character. Will check well-formedness of newlines: \r
 
    // must be followed by a \n, because this is used for error reporting. Will
 
    // not check for ascii-ness of the file, better left to a tokenizer.
 
    pub fn consume(&mut self) {
 
        match self.next() {
 
            Some(b'\r') => {
 
                if Some(b'\n') == self.lookahead(1) {
 
                    // Well formed file
 
                    self.offset += 1;
 
                } else {
 
                    // Not a well-formed file, pretend like we can continue
 
                    self.offset += 1;
 
                    self.set_error("Encountered carriage-feed without a following newline");
 
                }
 
            },
 
            Some(b'\n') => {
 
                self.line += 1;
 
                self.offset += 1;
 
            },
 
            Some(_) => {
 
                self.offset += 1;
 
            }
 
            None => {}
 
        }
 

	
 
        // Maybe we actually want to check this in release mode. Then again:
 
        // a 4 gigabyte source file... Really?
 
        debug_assert!(self.offset < u32::max_value() as usize);
 
    }
 

	
 
    fn set_error(&mut self, msg: &str) {
 
        if self.had_error.is_none() {
 
            self.had_error = Some(ParseError::new_error_str_at_pos(self, self.pos(), msg));
 
        }
 
    }
 

	
 
    fn get_lookup(&self) -> RwLockReadGuard<Vec<u32>> {
 
        // Once constructed the lookup always contains one element. We use this
 
        // to see if it is constructed already.
 
        {
 
            let lookup = self.offset_lookup.read().unwrap();
 
            if !lookup.is_empty() {
 
                return lookup;
 
            }
 
        }
 

	
 
        // Lookup was not constructed yet
 
        let mut lookup = self.offset_lookup.write().unwrap();
 
        if !lookup.is_empty() {
 
            // Somebody created it before we had the chance
 
            drop(lookup);
 
            let lookup = self.offset_lookup.read().unwrap();
 
            return lookup;
 
        }
 

	
 
        // Build the line number (!) to offset lookup, so offset by 1. We 
 
        // assume the entire source file is scanned (most common case) for
 
        // preallocation.
 
        lookup.reserve(self.line as usize + 2);
 
        lookup.push(0); // line 0: never used
 
        lookup.push(0); // first line: first character
 

	
 
        for char_idx in 0..self.input.len() {
 
            if self.input[char_idx] == b'\n' {
 
                lookup.push(char_idx as u32 + 1);
 
            }
 
        }
 

	
 
        lookup.push(self.input.len() as u32 + 1); // for lookup_line_end, intentionally adding one character
 
        debug_assert_eq!(self.line as usize + 2, lookup.len(), "remove me: i am a testing assert and sometimes invalid");
 

	
 
        // Return created lookup
 
        drop(lookup);
 
        let lookup = self.offset_lookup.read().unwrap();
 
        return lookup;
 
    }
 

	
 
    /// Retrieves offset at which line starts (right after newline)
 
    fn lookup_line_start_offset(&self, line_number: u32) -> u32 {
 
        let lookup = self.get_lookup();
 
        lookup[line_number as usize]
 
    }
 

	
 
    /// Retrieves offset at which line ends (at the newline character or the
 
    /// preceding carriage feed for \r\n-encoded newlines)
 
    fn lookup_line_end_offset(&self, line_number: u32) -> u32 {
 
        let lookup = self.get_lookup();
 
        let offset = lookup[(line_number + 1) as usize] - 1;
 
        let offset_usize = offset as usize;
 

	
 
        // Compensate for newlines and a potential carriage feed. Note that the
 
        // end position is exclusive. So we only need to compensate for a
 
        // "\r\n"
 
        if offset_usize > 0 && offset_usize < self.input.len() && self.input[offset_usize] == b'\n' && self.input[offset_usize - 1] == b'\r' {
 
            offset - 1
 
        } else {
 
            offset
 
        }
 
    }
 
}
 

	
 
#[derive(Debug)]
 
pub enum StatementKind {
 
    Info,
 
    Error
 
}
 

	
 
#[derive(Debug)]
 
pub enum ContextKind {
 
    SingleLine,
 
    MultiLine,
 
}
 

	
 
#[derive(Debug)]
 
pub struct ErrorStatement {
 
    pub(crate) statement_kind: StatementKind,
 
    pub(crate) context_kind: ContextKind,
 
    pub(crate) start_line: u32,
 
    pub(crate) start_column: u32,
 
    pub(crate) end_line: u32,
 
    pub(crate) end_column: u32,
 
    pub(crate) filename: String,
 
    pub(crate) context: String,
 
    pub(crate) message: String,
 
}
 

	
 
impl ErrorStatement {
 
    fn from_source_at_pos(statement_kind: StatementKind, source: &InputSource, position: InputPosition, message: String) -> Self {
 
        // Seek line start and end
 
        let line_start = source.lookup_line_start_offset(position.line);
 
        let line_end = source.lookup_line_end_offset(position.line);
 
        let context = Self::create_context(source, line_start as usize, line_end as usize);
 
        debug_assert!(position.offset >= line_start);
 
        let column = position.offset - line_start + 1;
 

	
 
        Self{
 
            statement_kind,
 
            context_kind: ContextKind::SingleLine,
 
            start_line: position.line,
 
            start_column: column,
 
            end_line: position.line,
 
            end_column: column + 1,
 
            filename: source.filename.clone(),
 
            context,
 
            message,
 
        }
 
    }
 

	
 
    pub(crate) fn from_source_at_span(statement_kind: StatementKind, source: &InputSource, span: InputSpan, message: String) -> Self {
 
        debug_assert!(span.end.line >= span.begin.line);
 
        debug_assert!(span.end.offset >= span.begin.offset);
 

	
 
        let first_line_start = source.lookup_line_start_offset(span.begin.line);
 
        let last_line_start = source.lookup_line_start_offset(span.end.line);
 
        let last_line_end = source.lookup_line_end_offset(span.end.line);
 
        let context = Self::create_context(source, first_line_start as usize, last_line_end as usize);
 
        debug_assert!(span.begin.offset >= first_line_start);
 
        let start_column = span.begin.offset - first_line_start + 1;
 
        let end_column = span.end.offset - last_line_start + 1;
 

	
 
        let context_kind = if span.begin.line == span.end.line {
 
            ContextKind::SingleLine
 
        } else {
 
            ContextKind::MultiLine
 
        };
 

	
src/protocol/tests/eval_calls.rs
Show inline comments
 
use super::*;
 

	
 
#[test]
 
fn test_function_call() {
 
    Tester::new_single_source_expect_ok("with literal arg", "
 
    func add_two(u32 value) -> u32 {
 
        return value + 2;
 
    }
 
    func foo() -> u32 {
 
        return add_two(5);
 
    }
 
    ").for_function("foo", |f| {
 
        f.call_ok(Some(Value::UInt32(7)));
 
    });
 

	
 
    Tester::new_single_source_expect_ok("with variable arg", "
 
    func add_two(u32 value) -> u32 {
 
        value += 1;
 
        return value + 1;
 
    }
 
    func foo() -> bool {
 
        auto initial = 5;
 
        auto result = add_two(initial);
 
        return initial == 5 && result == 7;
 
    }").for_function("foo", |f| {
 
        f.call_ok(Some(Value::Bool(true)));
 
    });
 
}
 

	
 
#[test]
 
fn test_recursion() {
 
    // Single-chain
 
    Tester::new_single_source_expect_ok("factorial", "
 
    func horribly_slow_factorial(u32 term) -> u32 {
 
        if (term <= 0) { return 1; }
 

	
 
        return term * horribly_slow_factorial(term - 1);
 
    }
 
    func foo() -> u32 {
 
        return horribly_slow_factorial(10);
 
    }
 
    ").for_function("foo", |f| {
 
        f.call_ok(Some(Value::UInt32(3628800)));
 
    });
 

	
 
    // Multi-chain horribleness
 
    Tester::new_single_source_expect_ok("fibonacci", "
 
    func horribly_slow_fibo(u32 term) -> u32 {
 
        if (term <= 1) {
 
            return 1;
 
        }
 
        return horribly_slow_fibo(term - 2) + horribly_slow_fibo(term - 1);
 
    }
 
    func foo() -> u32 {
 
        return horribly_slow_fibo(10);
 
    }").for_function("foo", |f| {
 
        f.call_ok(Some(Value::UInt32(89)));
 
    });
 

	
 
    // Mutual recursion (in a contrived fashion, ofcourse)
 
    Tester::new_single_source_expect_ok("mutual recursion", "
 
    func collatz_even(u32 iter, u32 value) -> u32 {
 
        value = value / 2;
 
        if (value % 2 == 0) return collatz_even(iter + 1, value);
 
        else                return collatz_odd(iter + 1, value);
 
    }
 
    func collatz_odd(u32 iter, u32 value) -> u32 {
 
        if (value <= 1) return iter;
 

	
 
        value = 3 * value + 1;
 
        if (value % 2 == 0) return collatz_even(iter + 1, value);
 
        else                return collatz_odd(iter + 1, value);
 
    }
 
    func foo() -> u32 {
 
        return collatz_odd(1, 19);
 
    }
 
    ").for_function("foo", |f| {
 
        f.call_ok(Some(Value::UInt32(21)));
 
    });
 
}
 

	
 
#[test]
 
fn test_empty_blocks() {
 
    // Yes this is silly, but I managed to make this a bug before
 
    Tester::new_single_source_expect_ok("traversing empty statements", "
 
    func foo() -> u32 {
 
        auto val = 128;
 
        if (true) {}
 
        while (false) {}
 
        return val;
 
    }
 
    ").for_function("foo", |f| { f.call_ok(Some(Value::UInt32(128))); });
 
}
 
\ No newline at end of file
src/protocol/tests/parser_literals.rs
Show inline comments
 
use super::*;
 

	
 
#[test]
 
fn test_binary_literals() {
 
    Tester::new_single_source_expect_ok("valid", "
 
        func test() -> u32 {
 
            u8  v1 = 0b0100_0010;
 
            u16 v2 = 0b10101010;
 
            u32 v3 = 0b10000001_01111110;
 
            u64 v4 = 0b1001_0110_1001_0110;
 

	
 
            return 0b10110;
 
        }
 
    ");
 

	
 
    Tester::new_single_source_expect_err("invalid character", "
 
        func test() -> u32 {
 
            return 0b10011001_10012001;
 
        }
 
    ").error(|e| { e.assert_msg_has(0, "incorrectly formatted binary number"); });
 

	
 
    Tester::new_single_source_expect_err("no characters", "
 
        func test() -> u32 { return 0b; }
 
    ").error(|e| { e.assert_msg_has(0, "incorrectly formatted binary number"); });
 

	
 
    Tester::new_single_source_expect_err("only separators", "
 
        func test() -> u32 { return 0b____; }
 
    ").error(|e| { e.assert_msg_has(0, "incorrectly formatted binary number"); });
 
}
 

	
 
#[test]
 
fn test_string_literals() {
 
    Tester::new_single_source_expect_ok("valid", "
 
        func test() -> string {
 
            auto v1 = \"Hello, world!\";
 
            auto v2 = \"\\t\\r\\n\\\\\"; // why hello there, confusing thing
 
            auto v3 = \"\";
 
            return \"No way, dude!\";
 
        }
 
    ").for_function("test", |f| { f
 
        .for_variable("v1", |v| { v.assert_concrete_type("string"); })
 
        .for_variable("v2", |v| { v.assert_concrete_type("string"); })
 
        .for_variable("v3", |v| { v.assert_concrete_type("string"); });
 
    });
 

	
 
    Tester::new_single_source_expect_err("unterminated simple", "
 
        func test() -> string { return \"'; }
 
    ").error(|e| { e
 
        .assert_num(1)
 
        .assert_occurs_at(0, "\"")
 
        .assert_msg_has(0, "unterminated");
 
    });
 

	
 
    Tester::new_single_source_expect_err("unterminated with preceding escaped", "
 
        func test() -> string { return \"\\\"; }
 
    ").error(|e| { e
 
        .assert_num(1)
 
        .assert_occurs_at(0, "\"\\")
 
        .assert_msg_has(0, "unterminated");
 
    });
 

	
 
    Tester::new_single_source_expect_err("invalid escaped character", "
 
        func test() -> string { return \"\\y\"; }
 
    ").error(|e| { e.assert_msg_has(0, "unsupported escape character 'y'"); });
 

	
 
    // Note sure if this should always be in here...
 
    Tester::new_single_source_expect_err("non-ASCII string", "
 
        func test() -> string { return \"💧\"; }
 
    ").error(|e| { e.assert_msg_has(0, "non-ASCII character in string literal"); });
 
}
 
\ No newline at end of file
0 comments (0 inline, 0 general)