From 3ebc282f2d0edce75a63d40b73fd818d267cc5c8 2021-05-31 11:13:54 From: mh Date: 2021-05-31 11:13:54 Subject: [PATCH] String literal testing, fix string escaping bug --- diff --git a/src/protocol/parser/pass_tokenizer.rs b/src/protocol/parser/pass_tokenizer.rs index a1e29aabfa77e1c29d0fd3b0d9711ba67a30c6ec..86649664d3562b13ba6bf7e31a3c513dd9bc386c 100644 --- a/src/protocol/parser/pass_tokenizer.rs +++ b/src/protocol/parser/pass_tokenizer.rs @@ -456,11 +456,18 @@ impl PassTokenizer { source.consume(); if c == b'"' && prev_char != b'\\' { + // Unescaped string terminator prev_char = c; break; } - prev_char = c; + if prev_char == b'\\' && c == b'\\' { + // Escaped backslash, set prev_char to bogus to not conflict + // with escaped-" and unterminated string literal detection. + prev_char = b'\0'; + } else { + prev_char = c; + } } if prev_char != b'"' { diff --git a/src/protocol/parser/token_parsing.rs b/src/protocol/parser/token_parsing.rs index 670cde33320a149ac89187fad3b5533e8cee9d5e..e0e90d8f1da7472dd824a7ef01c036c09481b827 100644 --- a/src/protocol/parser/token_parsing.rs +++ b/src/protocol/parser/token_parsing.rs @@ -366,7 +366,7 @@ pub(crate) fn consume_character_literal( }, 2 => { if char_text[0] == b'\\' { - let result = parse_escaped_character(source, iter.last_valid_pos(), char_text[1])?; + let result = parse_escaped_character(source, span, char_text[1])?; return Ok((result, span)) } }, @@ -401,7 +401,7 @@ pub(crate) fn consume_string_literal( let cur = text[idx]; if cur != b'\\' { if was_escape { - let to_push = parse_escaped_character(source, iter.last_valid_pos(), cur)?; + let to_push = parse_escaped_character(source, span, cur)?; buffer.push(to_push); } else { buffer.push(cur as char); @@ -417,7 +417,7 @@ pub(crate) fn consume_string_literal( Ok(span) } -fn parse_escaped_character(source: &InputSource, pos: InputPosition, v: u8) -> Result { +fn parse_escaped_character(source: &InputSource, literal_span: InputSpan, v: u8) -> Result { let result = match v { b'r' => '\r', b'n' => '\n', @@ -426,9 +426,14 @@ fn parse_escaped_character(source: &InputSource, pos: InputPosition, v: u8) -> R b'\\' => '\\', b'\'' => '\'', b'"' => '"', - v => return Err(ParseError::new_error_at_pos( - source, pos, format!("unexpected escaped character '{}'", v) - )), + v => { + let msg = if v.is_ascii_graphic() { + format!("unsupported escape character '{}'", v as char) + } else { + format!("unsupported escape character with (unsigned) byte value {}", v) + }; + return Err(ParseError::new_error_at_span(source, literal_span, msg)) + }, }; Ok(result) } diff --git a/src/protocol/tests/mod.rs b/src/protocol/tests/mod.rs index eba79350ebe381f68cf7d8127ffb5f10b29f9f3d..3578fece67601e6869de95537a1f340b6a977788 100644 --- a/src/protocol/tests/mod.rs +++ b/src/protocol/tests/mod.rs @@ -18,6 +18,7 @@ mod parser_inference; mod parser_monomorphs; mod parser_imports; mod parser_binding; +mod parser_literals; mod eval_operators; mod eval_calls; mod eval_casting; diff --git a/src/protocol/tests/parser_literals.rs b/src/protocol/tests/parser_literals.rs new file mode 100644 index 0000000000000000000000000000000000000000..cb21deeeac5996b32ef97fb50745c95619486f7c --- /dev/null +++ b/src/protocol/tests/parser_literals.rs @@ -0,0 +1,65 @@ +use super::*; + +#[test] +fn test_binary_literals() { + Tester::new_single_source_expect_ok("valid", " + func test() -> u32 { + u8 v1 = 0b0100_0010; + u16 v2 = 0b10101010; + u32 v3 = 0b10000001_01111110; + u64 v4 = 0b1001_0110_1001_0110; + + return 0b10110; + } + "); + + Tester::new_single_source_expect_err("invalid character", " + func test() -> u32 { + return 0b10011001_10012001; + } + ").error(|e| { e.assert_msg_has(0, "incorrectly formatted binary number"); }); + + Tester::new_single_source_expect_err("no characters", " + func test() -> u32 { return 0b; } + ").error(|e| { e.assert_msg_has(0, "incorrectly formatted binary number"); }); + + Tester::new_single_source_expect_err("only separators", " + func test() -> u32 { return 0b____; } + ").error(|e| { e.assert_msg_has(0, "incorrectly formatted binary number"); }); +} + +#[test] +fn test_string_literals() { + Tester::new_single_source_expect_ok("valid", " + func test() -> string { + auto v1 = \"Hello, world!\"; + auto v2 = \"\\t\\r\\n\\\\\"; // why hello there, confusing thing + auto v3 = \"\"; + return \"No way, dude!\"; + } + ").for_function("test", |f| { f + .for_variable("v1", |v| { v.assert_concrete_type("string"); }) + .for_variable("v2", |v| { v.assert_concrete_type("string"); }) + .for_variable("v3", |v| { v.assert_concrete_type("string"); }); + }); + + Tester::new_single_source_expect_err("unterminated simple", " + func test() -> string { return \"'; } + ").error(|e| { e + .assert_num(1) + .assert_occurs_at(0, "\"") + .assert_msg_has(0, "unterminated"); + }); + + Tester::new_single_source_expect_err("unterminated with preceding escaped", " + func test() -> string { return \"\\\"; } + ").error(|e| { e + .assert_num(1) + .assert_occurs_at(0, "\"\\") + .assert_msg_has(0, "unterminated"); + }); + + Tester::new_single_source_expect_err("invalid escaped character", " + func test() -> string { return \"\\y\"; } + ").error(|e| { e.assert_msg_has(0, "unsupported escape character 'y'"); }); +} \ No newline at end of file