Changeset - 533b69e54b4b
[Not reviewed]
0 1 0
MH - 4 years ago 2021-05-31 14:37:03
contact@maxhenger.nl
Fix to escaped character parsing
1 file changed with 4 insertions and 2 deletions:
0 comments (0 inline, 0 general)
src/protocol/parser/token_parsing.rs
Show inline comments
 
@@ -358,106 +358,108 @@ pub(crate) fn consume_character_literal(
 
        0 => return Err(ParseError::new_error_str_at_span(source, span, "too little characters in character literal")),
 
        1 => {
 
            // We already know the text is ascii, so just throw an error if we have the escape
 
            // character.
 
            if char_text[0] == b'\\' {
 
                return Err(ParseError::new_error_str_at_span(source, span, "escape character without subsequent character"));
 
            }
 
            return Ok((char_text[0] as char, span));
 
        },
 
        2 => {
 
            if char_text[0] == b'\\' {
 
                let result = parse_escaped_character(source, span, char_text[1])?;
 
                return Ok((result, span))
 
            }
 
        },
 
        _ => {}
 
    }
 

	
 
    return Err(ParseError::new_error_str_at_span(source, span, "too many characters in character literal"))
 
}
 

	
 
/// Consumes a string literal. We currently support a limited number of
 
/// backslash-escaped characters. Note that the result is stored in the
 
/// buffer.
 
pub(crate) fn consume_string_literal(
 
    source: &InputSource, iter: &mut TokenIter, buffer: &mut String
 
) -> Result<InputSpan, ParseError> {
 
    if Some(TokenKind::String) != iter.next() {
 
        return Err(ParseError::new_error_str_at_pos(source, iter.last_valid_pos(), "expected a string literal"));
 
    }
 

	
 
    buffer.clear();
 
    let span = iter.next_span();
 
    iter.consume();
 

	
 
    let text = source.section_at_span(span);
 
    if !text.is_ascii() {
 
        return Err(ParseError::new_error_str_at_span(source, span, "expected an ASCII string literal"));
 
    }
 

	
 
    debug_assert_eq!(text[0], b'"'); // here as kind of a reminder: the span includes the bounding quotation marks
 
    debug_assert_eq!(text[text.len() - 1], b'"');
 

	
 
    buffer.reserve(text.len() - 2);
 

	
 
    let mut was_escape = false;
 
    for idx in 1..text.len() - 1 {
 
        let cur = text[idx];
 
        if cur != b'\\' {
 
        let is_escape = cur == b'\\';
 
        if was_escape {
 
            let to_push = parse_escaped_character(source, span, cur)?;
 
            buffer.push(to_push);
 
        } else {
 
            buffer.push(cur as char);
 
        }
 

	
 
        if was_escape && is_escape {
 
            was_escape = false;
 
        } else {
 
            was_escape = true;
 
            was_escape = is_escape;
 
        }
 
    }
 

	
 
    debug_assert!(!was_escape); // because otherwise we couldn't have ended the string literal
 

	
 
    Ok(span)
 
}
 

	
 
fn parse_escaped_character(source: &InputSource, literal_span: InputSpan, v: u8) -> Result<char, ParseError> {
 
    let result = match v {
 
        b'r' => '\r',
 
        b'n' => '\n',
 
        b't' => '\t',
 
        b'0' => '\0',
 
        b'\\' => '\\',
 
        b'\'' => '\'',
 
        b'"' => '"',
 
        v => {
 
            let msg = if v.is_ascii_graphic() {
 
                format!("unsupported escape character '{}'", v as char)
 
            } else {
 
                format!("unsupported escape character with (unsigned) byte value {}", v)
 
            };
 
            return Err(ParseError::new_error_at_span(source, literal_span, msg))
 
        },
 
    };
 
    Ok(result)
 
}
 

	
 
pub(crate) fn consume_pragma<'a>(source: &'a InputSource, iter: &mut TokenIter) -> Result<(&'a [u8], InputPosition, InputPosition), ParseError> {
 
    if Some(TokenKind::Pragma) != iter.next() {
 
        return Err(ParseError::new_error_str_at_pos(source, iter.last_valid_pos(), "expected a pragma"));
 
    }
 
    let (pragma_start, pragma_end) = iter.next_positions();
 
    iter.consume();
 
    Ok((source.section_at_pos(pragma_start, pragma_end), pragma_start, pragma_end))
 
}
 

	
 
pub(crate) fn has_ident(source: &InputSource, iter: &mut TokenIter, expected: &[u8]) -> bool {
 
    peek_ident(source, iter).map_or(false, |section| section == expected)
 
}
 

	
 
pub(crate) fn peek_ident<'a>(source: &'a InputSource, iter: &mut TokenIter) -> Option<&'a [u8]> {
 
    if Some(TokenKind::Ident) == iter.next() {
 
        let (start, end) = iter.next_positions();
 
        return Some(source.section_at_pos(start, end))
 
    }
 

	
0 comments (0 inline, 0 general)