Changeset - 3ebc282f2d0e
[Not reviewed]
0 3 1
mh - 4 years ago 2021-05-31 11:13:54
contact@maxhenger.nl
String literal testing, fix string escaping bug
4 files changed with 84 insertions and 6 deletions:
0 comments (0 inline, 0 general)
src/protocol/parser/pass_tokenizer.rs
Show inline comments
 
@@ -267,390 +267,397 @@ impl PassTokenizer {
 
        } else if first_char == b'-' {
 
            source.consume();
 
            let next = source.next();
 
            if Some(b'-') == next {
 
                source.consume();
 
                token_kind = TokenKind::MinusMinus;
 
            } else if Some(b'>') == next {
 
                source.consume();
 
                token_kind = TokenKind::ArrowRight;
 
            } else if Some(b'=') == next {
 
                source.consume();
 
                token_kind = TokenKind::MinusEquals;
 
            } else {
 
                token_kind = TokenKind::Minus;
 
            }
 
        } else if first_char == b'.' {
 
            source.consume();
 
            if let Some(b'.') = source.next() {
 
                source.consume();
 
                token_kind = TokenKind::DotDot;
 
            } else {
 
                token_kind = TokenKind::Dot
 
            }
 
        } else if first_char == b'/' {
 
            source.consume();
 
            debug_assert_ne!(Some(b'/'), source.next());
 
            debug_assert_ne!(Some(b'*'), source.next());
 
            if let Some(b'=') = source.next() {
 
                source.consume();
 
                token_kind = TokenKind::SlashEquals;
 
            } else {
 
                token_kind = TokenKind::Slash;
 
            }
 
        } else if first_char == b':' {
 
            source.consume();
 
            if let Some(b':') = source.next() {
 
                source.consume();
 
                token_kind = TokenKind::ColonColon;
 
            } else {
 
                token_kind = TokenKind::Colon;
 
            }
 
        } else if first_char == b';' {
 
            source.consume();
 
            token_kind = TokenKind::SemiColon;
 
        } else if first_char == b'<' {
 
            source.consume();
 
            let next = source.next();
 
            if let Some(b'<') = next {
 
                source.consume();
 
                if let Some(b'=') = source.next() {
 
                    source.consume();
 
                    token_kind = TokenKind::ShiftLeftEquals;
 
                } else {
 
                    token_kind = TokenKind::ShiftLeft;
 
                }
 
            } else if let Some(b'=') = next {
 
                source.consume();
 
                token_kind = TokenKind::LessEquals;
 
            } else {
 
                token_kind = TokenKind::OpenAngle;
 
            }
 
        } else if first_char == b'=' {
 
            source.consume();
 
            if let Some(b'=') = source.next() {
 
                source.consume();
 
                token_kind = TokenKind::EqualEqual;
 
            } else {
 
                token_kind = TokenKind::Equal;
 
            }
 
        } else if first_char == b'>' {
 
            source.consume();
 
            let next = source.next();
 
            if Some(b'>') == next {
 
                source.consume();
 
                if Some(b'=') == source.next() {
 
                    source.consume();
 
                    token_kind = TokenKind::ShiftRightEquals;
 
                } else {
 
                    token_kind = TokenKind::ShiftRight;
 
                }
 
            } else if Some(b'=') == next {
 
                source.consume();
 
                token_kind = TokenKind::GreaterEquals;
 
            } else {
 
                token_kind = TokenKind::CloseAngle;
 
            }
 
        } else if first_char == b'?' {
 
            source.consume();
 
            token_kind = TokenKind::Question;
 
        } else if first_char == b'@' {
 
            source.consume();
 
            if let Some(b'=') = source.next() {
 
                source.consume();
 
                token_kind = TokenKind::AtEquals;
 
            } else {
 
                token_kind = TokenKind::At;
 
            }
 
        } else if first_char == b'[' {
 
            source.consume();
 
            token_kind = TokenKind::OpenSquare;
 
        } else if first_char == b']' {
 
            source.consume();
 
            token_kind = TokenKind::CloseSquare;
 
        } else if first_char == b'^' {
 
            source.consume();
 
            if let Some(b'=') = source.next() {
 
                source.consume();
 
                token_kind = TokenKind::CaretEquals;
 
            } else {
 
                token_kind = TokenKind::Caret;
 
            }
 
        } else if first_char == b'{' {
 
            source.consume();
 
            token_kind = TokenKind::OpenCurly;
 
        } else if first_char == b'|' {
 
            source.consume();
 
            let next = source.next();
 
            if Some(b'|') == next {
 
                source.consume();
 
                token_kind = TokenKind::OrOr;
 
            } else if Some(b'=') == next {
 
                source.consume();
 
                token_kind = TokenKind::OrEquals;
 
            } else {
 
                token_kind = TokenKind::Or;
 
            }
 
        } else if first_char == b'}' {
 
            source.consume();
 
            token_kind = TokenKind::CloseCurly;
 
        } else if first_char == b'~' {
 
            source.consume();
 
            token_kind = TokenKind::Tilde;
 
        } else {
 
            self.check_ascii(source)?;
 
            return Ok(None);
 
        }
 

	
 
        target.tokens.push(Token::new(token_kind, pos));
 
        Ok(Some((token_kind, pos)))
 
    }
 

	
 
    fn consume_char_literal(&mut self, source: &mut InputSource, target: &mut TokenBuffer) -> Result<(), ParseError> {
 
        let begin_pos = source.pos();
 

	
 
        // Consume the leading quote
 
        debug_assert!(source.next().unwrap() == b'\'');
 
        source.consume();
 

	
 
        let mut prev_char = b'\'';
 
        while let Some(c) = source.next() {
 
            if !c.is_ascii() {
 
                return Err(ParseError::new_error_str_at_pos(source, source.pos(), "non-ASCII character in char literal"));
 
            }
 
            source.consume();
 

	
 
            // Make sure ending quote was not escaped
 
            if c == b'\'' && prev_char != b'\\' {
 
                prev_char = c;
 
                break;
 
            }
 

	
 
            prev_char = c;
 
        }
 

	
 
        if prev_char != b'\'' {
 
            // Unterminated character literal, reached end of file.
 
            return Err(ParseError::new_error_str_at_pos(source, begin_pos, "encountered unterminated character literal"));
 
        }
 

	
 
        let end_pos = source.pos();
 

	
 
        target.tokens.push(Token::new(TokenKind::Character, begin_pos));
 
        target.tokens.push(Token::new(TokenKind::SpanEnd, end_pos));
 

	
 
        Ok(())
 
    }
 

	
 
    fn consume_string_literal(&mut self, source: &mut InputSource, target: &mut TokenBuffer) -> Result<(), ParseError> {
 
        let begin_pos = source.pos();
 

	
 
        // Consume the leading double quotes
 
        debug_assert!(source.next().unwrap() == b'"');
 
        source.consume();
 

	
 
        let mut prev_char = b'"';
 
        while let Some(c) = source.next() {
 
            if !c.is_ascii() {
 
                return Err(ParseError::new_error_str_at_pos(source, source.pos(), "non-ASCII character in string literal"));
 
            }
 

	
 
            source.consume();
 
            if c == b'"' && prev_char != b'\\' {
 
                // Unescaped string terminator
 
                prev_char = c;
 
                break;
 
            }
 

	
 
            if prev_char == b'\\' && c == b'\\' {
 
                // Escaped backslash, set prev_char to bogus to not conflict
 
                // with escaped-" and unterminated string literal detection.
 
                prev_char = b'\0';
 
            } else {
 
                prev_char = c;
 
            }
 
        }
 

	
 
        if prev_char != b'"' {
 
            // Unterminated string literal
 
            return Err(ParseError::new_error_str_at_pos(source, begin_pos, "encountered unterminated string literal"));
 
        }
 

	
 
        let end_pos = source.pos();
 
        target.tokens.push(Token::new(TokenKind::String, begin_pos));
 
        target.tokens.push(Token::new(TokenKind::SpanEnd, end_pos));
 

	
 
        Ok(())
 
    }
 

	
 
    fn consume_pragma_or_pound(&mut self, first_char: u8, source: &mut InputSource, target: &mut TokenBuffer) -> Result<bool, ParseError> {
 
        let start_pos = source.pos();
 
        debug_assert_eq!(first_char, b'#');
 
        source.consume();
 

	
 
        let next = source.next();
 
        if next.is_none() || !is_identifier_start(next.unwrap()) {
 
            // Just a pound sign
 
            target.tokens.push(Token::new(TokenKind::Pound, start_pos));
 
            Ok(false)
 
        } else {
 
            // Pound sign followed by identifier
 
            source.consume();
 
            while let Some(c) = source.next() {
 
                if !is_identifier_remaining(c) {
 
                    break;
 
                }
 
                source.consume();
 
            }
 

	
 
            self.check_ascii(source)?;
 

	
 
            let end_pos = source.pos();
 
            target.tokens.push(Token::new(TokenKind::Pragma, start_pos));
 
            target.tokens.push(Token::new(TokenKind::SpanEnd, end_pos));
 
            Ok(true)
 
        }
 
    }
 

	
 
    fn consume_line_comment(&mut self, source: &mut InputSource, target: &mut TokenBuffer) -> Result<(), ParseError> {
 
        let begin_pos = source.pos();
 

	
 
        // Consume the leading "//"
 
        debug_assert!(source.next().unwrap() == b'/' && source.lookahead(1).unwrap() == b'/');
 
        source.consume();
 
        source.consume();
 

	
 
        let mut prev_char = b'/';
 
        let mut cur_char = b'/';
 
        while let Some(c) = source.next() {
 
            prev_char = cur_char;
 
            cur_char = c;
 

	
 
            if c == b'\n' {
 
                // End of line, note that the newline is not consumed
 
                break;
 
            }
 

	
 
            source.consume();
 
        }
 

	
 
        let mut end_pos = source.pos();
 
        debug_assert_eq!(begin_pos.line, end_pos.line);
 

	
 
        // Modify offset to not include the newline characters
 
        if cur_char == b'\n' {
 
            if prev_char == b'\r' {
 
                end_pos.offset -= 2;
 
            } else {
 
                end_pos.offset -= 1;
 
            }
 
            // Consume final newline
 
            source.consume();
 
        } else {
 
            // End of comment was due to EOF
 
            debug_assert!(source.next().is_none())
 
        }
 

	
 
        target.tokens.push(Token::new(TokenKind::LineComment, begin_pos));
 
        target.tokens.push(Token::new(TokenKind::SpanEnd, end_pos));
 

	
 
        Ok(())
 
    }
 

	
 
    fn consume_block_comment(&mut self, source: &mut InputSource, target: &mut TokenBuffer) -> Result<(), ParseError> {
 
        let begin_pos = source.pos();
 

	
 
        // Consume the leading "/*"
 
        debug_assert!(source.next().unwrap() == b'/' && source.lookahead(1).unwrap() == b'*');
 
        source.consume();
 
        source.consume();
 

	
 
        // Explicitly do not put prev_char at "*", because then "/*/" would
 
        // represent a valid and closed block comment
 
        let mut prev_char = b' ';
 
        let mut is_closed = false;
 
        while let Some(c) = source.next() {
 
            source.consume();
 
            if prev_char == b'*' && c == b'/' {
 
                // End of block comment
 
                is_closed = true;
 
                break;
 
            }
 
            prev_char = c;
 
        }
 

	
 
        if !is_closed {
 
            return Err(ParseError::new_error_str_at_pos(
 
                source, source.pos(), "encountered unterminated block comment")
 
            );
 
        }
 

	
 
        let end_pos = source.pos();
 
        target.tokens.push(Token::new(TokenKind::BlockComment, begin_pos));
 
        target.tokens.push(Token::new(TokenKind::SpanEnd, end_pos));
 

	
 
        Ok(())
 
    }
 

	
 
    fn consume_identifier<'a>(&mut self, source: &'a mut InputSource, target: &mut TokenBuffer) -> Result<&'a [u8], ParseError> {
 
        let begin_pos = source.pos();
 
        debug_assert!(is_identifier_start(source.next().unwrap()));
 
        source.consume();
 

	
 
        // Keep reading until no more identifier
 
        while let Some(c) = source.next() {
 
            if !is_identifier_remaining(c) {
 
                break;
 
            }
 

	
 
            source.consume();
 
        }
 
        self.check_ascii(source)?;
 

	
 
        let end_pos = source.pos();
 
        target.tokens.push(Token::new(TokenKind::Ident, begin_pos));
 
        target.tokens.push(Token::new(TokenKind::SpanEnd, end_pos));
 
        Ok(source.section_at_pos(begin_pos, end_pos))
 
    }
 

	
 
    fn consume_number(&mut self, source: &mut InputSource, target: &mut TokenBuffer) -> Result<(), ParseError> {
 
        let begin_pos = source.pos();
 
        debug_assert!(is_integer_literal_start(source.next().unwrap()));
 
        source.consume();
 

	
 
        // Keep reading until it doesn't look like a number anymore
 
        while let Some(c) = source.next() {
 
            if !maybe_number_remaining(c) {
 
                break;
 
            }
 

	
 
            source.consume();
 
        }
 
        self.check_ascii(source)?;
 

	
 
        let end_pos = source.pos();
 
        target.tokens.push(Token::new(TokenKind::Integer, begin_pos));
 
        target.tokens.push(Token::new(TokenKind::SpanEnd, end_pos));
 

	
 
        Ok(())
 
    }
 

	
 
    // Consumes whitespace and returns whether or not the whitespace contained
 
    // a newline.
 
    fn consume_whitespace(&self, source: &mut InputSource) -> bool {
 
        debug_assert!(is_whitespace(source.next().unwrap()));
 

	
 
        let mut has_newline = false;
 
        while let Some(c) = source.next() {
 
            if !is_whitespace(c) {
 
                break;
 
            }
 

	
 
            if c == b'\n' {
 
                has_newline = true;
 
            }
 
            source.consume();
 
        }
 

	
 
        has_newline
 
    }
 

	
 
    fn add_code_range(
 
        &mut self, target: &mut TokenBuffer, parent_idx: i32,
 
        code_start_idx: u32, code_end_idx: u32, next_sibling_idx: i32
 
    ) {
 
        let new_range_idx = target.ranges.len() as i32;
 
        let parent_range = &mut target.ranges[parent_idx as usize];
 
        debug_assert_ne!(parent_range.end, code_end_idx, "called push_code_range without a need to do so");
src/protocol/parser/token_parsing.rs
Show inline comments
 
@@ -177,444 +177,449 @@ pub(crate) fn consume_comma_separated_until<T, F, E>(
 
        } else if !had_comma || next.is_none() {
 
            return Err(ParseError::new_error_at_pos(
 
                source, iter.last_valid_pos(),
 
                format!("expected a '{}', or {}", close_delim.token_chars(), item_name_and_article)
 
            ));
 
        }
 

	
 
        let new_item = consumer_fn(source, iter, ctx)?;
 
        target.push(new_item);
 

	
 
        next = iter.next();
 
        had_comma = next == Some(TokenKind::Comma);
 
        if had_comma {
 
            iter.consume();
 
        }
 
    }
 

	
 
    Ok(())
 
}
 

	
 
/// Consumes a comma-separated list of items if the opening delimiting token is
 
/// encountered. If not, then the iterator will remain at its current position.
 
/// Note that the potential cases may be:
 
/// - No opening delimiter encountered, then we return `false`.
 
/// - Both opening and closing delimiter encountered, but no items.
 
/// - Opening and closing delimiter encountered, and items were processed.
 
/// - Found an opening delimiter, but processing an item failed.
 
pub(crate) fn maybe_consume_comma_separated<T, F, E>(
 
    open_delim: TokenKind, close_delim: TokenKind, source: &InputSource, iter: &mut TokenIter, ctx: &mut PassCtx,
 
    consumer_fn: F, target: &mut E, item_name_and_article: &'static str,
 
    close_pos: Option<&mut InputPosition>
 
) -> Result<bool, ParseError>
 
    where F: FnMut(&InputSource, &mut TokenIter, &mut PassCtx) -> Result<T, ParseError>,
 
          E: Extendable<Value=T>
 
{
 
    if Some(open_delim) != iter.next() {
 
        return Ok(false);
 
    }
 

	
 
    // Opening delimiter encountered, so must parse the comma-separated list.
 
    iter.consume();
 
    consume_comma_separated_until(close_delim, source, iter, ctx, consumer_fn, target, item_name_and_article, close_pos)?;
 

	
 
    Ok(true)
 
}
 

	
 
pub(crate) fn maybe_consume_comma_separated_spilled<F: FnMut(&InputSource, &mut TokenIter, &mut PassCtx) -> Result<(), ParseError>>(
 
    open_delim: TokenKind, close_delim: TokenKind, source: &InputSource,
 
    iter: &mut TokenIter, ctx: &mut PassCtx,
 
    mut consumer_fn: F, item_name_and_article: &'static str
 
) -> Result<bool, ParseError> {
 
    let mut next = iter.next();
 
    if Some(open_delim) != next {
 
        return Ok(false);
 
    }
 

	
 
    iter.consume();
 
    let mut had_comma = true;
 
    loop {
 
        next = iter.next();
 
        if Some(close_delim) == next {
 
            iter.consume();
 
            break;
 
        } else if !had_comma {
 
            return Err(ParseError::new_error_at_pos(
 
                source, iter.last_valid_pos(),
 
                format!("expected a '{}', or {}", close_delim.token_chars(), item_name_and_article)
 
            ));
 
        }
 

	
 
        consumer_fn(source, iter, ctx)?;
 
        next = iter.next();
 
        had_comma = next == Some(TokenKind::Comma);
 
        if had_comma {
 
            iter.consume();
 
        }
 
    }
 

	
 
    Ok(true)
 
}
 

	
 
/// Consumes a comma-separated list and expected the opening and closing
 
/// characters to be present. The returned array may still be empty
 
pub(crate) fn consume_comma_separated<T, F, E>(
 
    open_delim: TokenKind, close_delim: TokenKind, source: &InputSource,
 
    iter: &mut TokenIter, ctx: &mut PassCtx,
 
    consumer_fn: F, target: &mut E, item_name_and_article: &'static str,
 
    list_name_and_article: &'static str, close_pos: Option<&mut InputPosition>
 
) -> Result<(), ParseError>
 
    where F: FnMut(&InputSource, &mut TokenIter, &mut PassCtx) -> Result<T, ParseError>,
 
          E: Extendable<Value=T>
 
{
 
    let first_pos = iter.last_valid_pos();
 
    match maybe_consume_comma_separated(
 
        open_delim, close_delim, source, iter, ctx, consumer_fn, target,
 
        item_name_and_article, close_pos
 
    ) {
 
        Ok(true) => Ok(()),
 
        Ok(false) => {
 
            return Err(ParseError::new_error_at_pos(
 
                source, first_pos,
 
                format!("expected {}", list_name_and_article)
 
            ));
 
        },
 
        Err(err) => Err(err)
 
    }
 
}
 

	
 
/// Consumes an integer literal, may be binary, octal, hexadecimal or decimal,
 
/// and may have separating '_'-characters.
 
/// TODO: @Cleanup, @Performance
 
pub(crate) fn consume_integer_literal(source: &InputSource, iter: &mut TokenIter, buffer: &mut String) -> Result<(u64, InputSpan), ParseError> {
 
    if Some(TokenKind::Integer) != iter.next() {
 
        return Err(ParseError::new_error_str_at_pos(source, iter.last_valid_pos(), "expected an integer literal"));
 
    }
 
    let integer_span = iter.next_span();
 
    iter.consume();
 

	
 
    let integer_text = source.section_at_span(integer_span);
 

	
 
    // Determine radix and offset from prefix
 
    let (radix, input_offset, radix_name) =
 
        if integer_text.starts_with(b"0b") || integer_text.starts_with(b"0B") {
 
            // Binary number
 
            (2, 2, "binary")
 
        } else if integer_text.starts_with(b"0o") || integer_text.starts_with(b"0O") {
 
            // Octal number
 
            (8, 2, "octal")
 
        } else if integer_text.starts_with(b"0x") || integer_text.starts_with(b"0X") {
 
            // Hexadecimal number
 
            (16, 2, "hexadecimal")
 
        } else {
 
            (10, 0, "decimal")
 
        };
 

	
 
    // Take out any of the separating '_' characters
 
    buffer.clear();
 
    for char_idx in input_offset..integer_text.len() {
 
        let char = integer_text[char_idx];
 
        if char == b'_' {
 
            continue;
 
        }
 

	
 
        if !((char >= b'0' && char <= b'9') || (char >= b'A' && char <= b'F') || (char >= b'a' || char <= b'f')) {
 
            return Err(ParseError::new_error_at_span(
 
                source, integer_span,
 
                format!("incorrectly formatted {} number", radix_name)
 
            ));
 
        }
 
        buffer.push(char::from(char));
 
    }
 

	
 
    // Use the cleaned up string to convert to integer
 
    match u64::from_str_radix(&buffer, radix) {
 
        Ok(number) => Ok((number, integer_span)),
 
        Err(_) => Err(ParseError::new_error_at_span(
 
            source, integer_span,
 
            format!("incorrectly formatted {} number", radix_name)
 
        )),
 
    }
 
}
 

	
 
/// Consumes a character literal. We currently support a limited number of
 
/// backslash-escaped characters
 
pub(crate) fn consume_character_literal(
 
    source: &InputSource, iter: &mut TokenIter
 
) -> Result<(char, InputSpan), ParseError> {
 
    if Some(TokenKind::Character) != iter.next() {
 
        return Err(ParseError::new_error_str_at_pos(source, iter.last_valid_pos(), "expected a character literal"));
 
    }
 
    let span = iter.next_span();
 
    iter.consume();
 

	
 
    let char_text = source.section_at_span(span);
 
    if !char_text.is_ascii() {
 
        return Err(ParseError::new_error_str_at_span(
 
            source, span, "expected an ASCII character literal"
 
        ));
 
    }
 

	
 
    match char_text.len() {
 
        0 => return Err(ParseError::new_error_str_at_span(source, span, "too little characters in character literal")),
 
        1 => {
 
            // We already know the text is ascii, so just throw an error if we have the escape
 
            // character.
 
            if char_text[0] == b'\\' {
 
                return Err(ParseError::new_error_str_at_span(source, span, "escape character without subsequent character"));
 
            }
 
            return Ok((char_text[0] as char, span));
 
        },
 
        2 => {
 
            if char_text[0] == b'\\' {
 
                let result = parse_escaped_character(source, iter.last_valid_pos(), char_text[1])?;
 
                let result = parse_escaped_character(source, span, char_text[1])?;
 
                return Ok((result, span))
 
            }
 
        },
 
        _ => {}
 
    }
 

	
 
    return Err(ParseError::new_error_str_at_span(source, span, "too many characters in character literal"))
 
}
 

	
 
/// Consumes a string literal. We currently support a limited number of
 
/// backslash-escaped characters. Note that the result is stored in the
 
/// buffer.
 
pub(crate) fn consume_string_literal(
 
    source: &InputSource, iter: &mut TokenIter, buffer: &mut String
 
) -> Result<InputSpan, ParseError> {
 
    if Some(TokenKind::String) != iter.next() {
 
        return Err(ParseError::new_error_str_at_pos(source, iter.last_valid_pos(), "expected a string literal"));
 
    }
 

	
 
    buffer.clear();
 
    let span = iter.next_span();
 
    iter.consume();
 

	
 
    let text = source.section_at_span(span);
 
    if !text.is_ascii() {
 
        return Err(ParseError::new_error_str_at_span(source, span, "expected an ASCII string literal"));
 
    }
 
    buffer.reserve(text.len());
 

	
 
    let mut was_escape = false;
 
    for idx in 0..text.len() {
 
        let cur = text[idx];
 
        if cur != b'\\' {
 
            if was_escape {
 
                let to_push = parse_escaped_character(source, iter.last_valid_pos(), cur)?;
 
                let to_push = parse_escaped_character(source, span, cur)?;
 
                buffer.push(to_push);
 
            } else {
 
                buffer.push(cur as char);
 
            }
 
            was_escape = false;
 
        } else {
 
            was_escape = true;
 
        }
 
    }
 

	
 
    debug_assert!(!was_escape); // because otherwise we couldn't have ended the string literal
 

	
 
    Ok(span)
 
}
 

	
 
fn parse_escaped_character(source: &InputSource, pos: InputPosition, v: u8) -> Result<char, ParseError> {
 
fn parse_escaped_character(source: &InputSource, literal_span: InputSpan, v: u8) -> Result<char, ParseError> {
 
    let result = match v {
 
        b'r' => '\r',
 
        b'n' => '\n',
 
        b't' => '\t',
 
        b'0' => '\0',
 
        b'\\' => '\\',
 
        b'\'' => '\'',
 
        b'"' => '"',
 
        v => return Err(ParseError::new_error_at_pos(
 
            source, pos, format!("unexpected escaped character '{}'", v)
 
        )),
 
        v => {
 
            let msg = if v.is_ascii_graphic() {
 
                format!("unsupported escape character '{}'", v as char)
 
            } else {
 
                format!("unsupported escape character with (unsigned) byte value {}", v)
 
            };
 
            return Err(ParseError::new_error_at_span(source, literal_span, msg))
 
        },
 
    };
 
    Ok(result)
 
}
 

	
 
pub(crate) fn consume_pragma<'a>(source: &'a InputSource, iter: &mut TokenIter) -> Result<(&'a [u8], InputPosition, InputPosition), ParseError> {
 
    if Some(TokenKind::Pragma) != iter.next() {
 
        return Err(ParseError::new_error_str_at_pos(source, iter.last_valid_pos(), "expected a pragma"));
 
    }
 
    let (pragma_start, pragma_end) = iter.next_positions();
 
    iter.consume();
 
    Ok((source.section_at_pos(pragma_start, pragma_end), pragma_start, pragma_end))
 
}
 

	
 
pub(crate) fn has_ident(source: &InputSource, iter: &mut TokenIter, expected: &[u8]) -> bool {
 
    peek_ident(source, iter).map_or(false, |section| section == expected)
 
}
 

	
 
pub(crate) fn peek_ident<'a>(source: &'a InputSource, iter: &mut TokenIter) -> Option<&'a [u8]> {
 
    if Some(TokenKind::Ident) == iter.next() {
 
        let (start, end) = iter.next_positions();
 
        return Some(source.section_at_pos(start, end))
 
    }
 

	
 
    None
 
}
 

	
 
/// Consumes any identifier and returns it together with its span. Does not
 
/// check if the identifier is a reserved keyword.
 
pub(crate) fn consume_any_ident<'a>(
 
    source: &'a InputSource, iter: &mut TokenIter
 
) -> Result<(&'a [u8], InputSpan), ParseError> {
 
    if Some(TokenKind::Ident) != iter.next() {
 
        return Err(ParseError::new_error_str_at_pos(source, iter.last_valid_pos(), "expected an identifier"));
 
    }
 
    let (ident_start, ident_end) = iter.next_positions();
 
    iter.consume();
 
    Ok((source.section_at_pos(ident_start, ident_end), InputSpan::from_positions(ident_start, ident_end)))
 
}
 

	
 
/// Consumes a specific identifier. May or may not be a reserved keyword.
 
pub(crate) fn consume_exact_ident(source: &InputSource, iter: &mut TokenIter, expected: &[u8]) -> Result<InputSpan, ParseError> {
 
    let (ident, pos) = consume_any_ident(source, iter)?;
 
    if ident != expected {
 
        debug_assert!(expected.is_ascii());
 
        return Err(ParseError::new_error_at_pos(
 
            source, iter.last_valid_pos(),
 
            format!("expected the text '{}'", &String::from_utf8_lossy(expected))
 
        ));
 
    }
 
    Ok(pos)
 
}
 

	
 
/// Consumes an identifier that is not a reserved keyword and returns it
 
/// together with its span.
 
pub(crate) fn consume_ident<'a>(
 
    source: &'a InputSource, iter: &mut TokenIter
 
) -> Result<(&'a [u8], InputSpan), ParseError> {
 
    let (ident, span) = consume_any_ident(source, iter)?;
 
    if is_reserved_keyword(ident) {
 
        return Err(ParseError::new_error_str_at_span(source, span, "encountered reserved keyword"));
 
    }
 

	
 
    Ok((ident, span))
 
}
 

	
 
/// Consumes an identifier and immediately intern it into the `StringPool`
 
pub(crate) fn consume_ident_interned(
 
    source: &InputSource, iter: &mut TokenIter, ctx: &mut PassCtx
 
) -> Result<Identifier, ParseError> {
 
    let (value, span) = consume_ident(source, iter)?;
 
    let value = ctx.pool.intern(value);
 
    Ok(Identifier{ span, value })
 
}
 

	
 
fn is_reserved_definition_keyword(text: &[u8]) -> bool {
 
    match text {
 
        KW_STRUCT | KW_ENUM | KW_UNION | KW_FUNCTION | KW_PRIMITIVE | KW_COMPOSITE => true,
 
        _ => false,
 
    }
 
}
 

	
 
fn is_reserved_statement_keyword(text: &[u8]) -> bool {
 
    match text {
 
        KW_IMPORT | KW_AS |
 
        KW_STMT_CHANNEL | KW_STMT_IF | KW_STMT_WHILE |
 
        KW_STMT_BREAK | KW_STMT_CONTINUE | KW_STMT_GOTO | KW_STMT_RETURN |
 
        KW_STMT_SYNC | KW_STMT_NEW => true,
 
        _ => false,
 
    }
 
}
 

	
 
fn is_reserved_expression_keyword(text: &[u8]) -> bool {
 
    match text {
 
        KW_LET | KW_CAST |
 
        KW_LIT_TRUE | KW_LIT_FALSE | KW_LIT_NULL |
 
        KW_FUNC_GET | KW_FUNC_PUT | KW_FUNC_FIRES | KW_FUNC_CREATE | KW_FUNC_ASSERT | KW_FUNC_LENGTH => true,
 
        _ => false,
 
    }
 
}
 

	
 
fn is_reserved_type_keyword(text: &[u8]) -> bool {
 
    match text {
 
        KW_TYPE_IN_PORT | KW_TYPE_OUT_PORT | KW_TYPE_MESSAGE | KW_TYPE_BOOL |
 
        KW_TYPE_UINT8 | KW_TYPE_UINT16 | KW_TYPE_UINT32 | KW_TYPE_UINT64 |
 
        KW_TYPE_SINT8 | KW_TYPE_SINT16 | KW_TYPE_SINT32 | KW_TYPE_SINT64 |
 
        KW_TYPE_CHAR | KW_TYPE_STRING |
 
        KW_TYPE_INFERRED => true,
 
        _ => false,
 
    }
 
}
 

	
 
fn is_reserved_keyword(text: &[u8]) -> bool {
 
    return
 
        is_reserved_definition_keyword(text) ||
 
        is_reserved_statement_keyword(text) ||
 
        is_reserved_expression_keyword(text) ||
 
        is_reserved_type_keyword(text);
 
}
 

	
 
pub(crate) fn seek_module(modules: &[Module], root_id: RootId) -> Option<&Module> {
 
    for module in modules {
 
        if module.root_id == root_id {
 
            return Some(module)
 
        }
 
    }
 

	
 
    return None
 
}
 

	
 
/// Constructs a human-readable message indicating why there is a conflict of
 
/// symbols.
 
// Note: passing the `module_idx` is not strictly necessary, but will prevent
 
// programmer mistakes during development: we get a conflict because we're
 
// currently parsing a particular module.
 
pub(crate) fn construct_symbol_conflict_error(
 
    modules: &[Module], module_idx: usize, ctx: &PassCtx, new_symbol: &Symbol, old_symbol: &Symbol
 
) -> ParseError {
 
    let module = &modules[module_idx];
 
    let get_symbol_span_and_msg = |symbol: &Symbol| -> (String, Option<InputSpan>) {
 
        match &symbol.variant {
 
            SymbolVariant::Module(module) => {
 
                let import = &ctx.heap[module.introduced_at];
 
                return (
 
                    format!("the module aliased as '{}' imported here", symbol.name.as_str()),
 
                    Some(import.as_module().span)
 
                );
 
            },
 
            SymbolVariant::Definition(definition) => {
 
                if definition.defined_in_module.is_invalid() {
 
                    // Must be a builtin thing
 
                    return (format!("the builtin '{}'", symbol.name.as_str()), None)
 
                } else {
 
                    if let Some(import_id) = definition.imported_at {
 
                        let import = &ctx.heap[import_id];
 
                        return (
 
                            format!("the type '{}' imported here", symbol.name.as_str()),
 
                            Some(import.as_symbols().span)
 
                        );
 
                    } else {
 
                        // This is a defined symbol. So this must mean that the
 
                        // error was caused by it being defined.
 
                        debug_assert_eq!(definition.defined_in_module, module.root_id);
 

	
 
                        return (
 
                            format!("the type '{}' defined here", symbol.name.as_str()),
 
                            Some(definition.identifier_span)
 
                        )
 
                    }
 
                }
 
            }
 
        }
 
    };
 

	
 
    let (new_symbol_msg, new_symbol_span) = get_symbol_span_and_msg(new_symbol);
 
    let (old_symbol_msg, old_symbol_span) = get_symbol_span_and_msg(old_symbol);
 
    let new_symbol_span = new_symbol_span.unwrap(); // because new symbols cannot be builtin
 

	
 
    match old_symbol_span {
 
        Some(old_symbol_span) => ParseError::new_error_at_span(
 
            &module.source, new_symbol_span, format!("symbol is defined twice: {}", new_symbol_msg)
 
        ).with_info_at_span(
 
            &module.source, old_symbol_span, format!("it conflicts with {}", old_symbol_msg)
 
        ),
 
        None => ParseError::new_error_at_span(
 
            &module.source, new_symbol_span,
 
            format!("symbol is defined twice: {} conflicts with {}", new_symbol_msg, old_symbol_msg)
 
        )
 
    }
 
}
 
\ No newline at end of file
src/protocol/tests/mod.rs
Show inline comments
 
/**
 
 * protocol/tests.rs
 
 *
 
 * Contains tests for various parts of the lexer/parser and the evaluator of the
 
 * code. These are intended to be temporary tests such that we're sure that we
 
 * don't break existing functionality.
 
 *
 
 * In the future these should be replaced by proper testing protocols.
 
 *
 
 * If any of these tests fail, and you think they're not needed anymore, feel
 
 * free to cast them out into oblivion, where dead code goes to die.
 
 */
 

	
 
mod utils;
 
mod lexer;
 
mod parser_validation;
 
mod parser_inference;
 
mod parser_monomorphs;
 
mod parser_imports;
 
mod parser_binding;
 
mod parser_literals;
 
mod eval_operators;
 
mod eval_calls;
 
mod eval_casting;
 
mod eval_binding;
 
mod eval_silly;
 

	
 
pub(crate) use utils::{Tester}; // the testing harness
 
pub(crate) use crate::protocol::eval::value::*; // to test functions
 
\ No newline at end of file
src/protocol/tests/parser_literals.rs
Show inline comments
 
new file 100644
 
use super::*;
 

	
 
#[test]
 
fn test_binary_literals() {
 
    Tester::new_single_source_expect_ok("valid", "
 
        func test() -> u32 {
 
            u8  v1 = 0b0100_0010;
 
            u16 v2 = 0b10101010;
 
            u32 v3 = 0b10000001_01111110;
 
            u64 v4 = 0b1001_0110_1001_0110;
 

	
 
            return 0b10110;
 
        }
 
    ");
 

	
 
    Tester::new_single_source_expect_err("invalid character", "
 
        func test() -> u32 {
 
            return 0b10011001_10012001;
 
        }
 
    ").error(|e| { e.assert_msg_has(0, "incorrectly formatted binary number"); });
 

	
 
    Tester::new_single_source_expect_err("no characters", "
 
        func test() -> u32 { return 0b; }
 
    ").error(|e| { e.assert_msg_has(0, "incorrectly formatted binary number"); });
 

	
 
    Tester::new_single_source_expect_err("only separators", "
 
        func test() -> u32 { return 0b____; }
 
    ").error(|e| { e.assert_msg_has(0, "incorrectly formatted binary number"); });
 
}
 

	
 
#[test]
 
fn test_string_literals() {
 
    Tester::new_single_source_expect_ok("valid", "
 
        func test() -> string {
 
            auto v1 = \"Hello, world!\";
 
            auto v2 = \"\\t\\r\\n\\\\\"; // why hello there, confusing thing
 
            auto v3 = \"\";
 
            return \"No way, dude!\";
 
        }
 
    ").for_function("test", |f| { f
 
        .for_variable("v1", |v| { v.assert_concrete_type("string"); })
 
        .for_variable("v2", |v| { v.assert_concrete_type("string"); })
 
        .for_variable("v3", |v| { v.assert_concrete_type("string"); });
 
    });
 

	
 
    Tester::new_single_source_expect_err("unterminated simple", "
 
        func test() -> string { return \"'; }
 
    ").error(|e| { e
 
        .assert_num(1)
 
        .assert_occurs_at(0, "\"")
 
        .assert_msg_has(0, "unterminated");
 
    });
 

	
 
    Tester::new_single_source_expect_err("unterminated with preceding escaped", "
 
        func test() -> string { return \"\\\"; }
 
    ").error(|e| { e
 
        .assert_num(1)
 
        .assert_occurs_at(0, "\"\\")
 
        .assert_msg_has(0, "unterminated");
 
    });
 

	
 
    Tester::new_single_source_expect_err("invalid escaped character", "
 
        func test() -> string { return \"\\y\"; }
 
    ").error(|e| { e.assert_msg_has(0, "unsupported escape character 'y'"); });
 
}
 
\ No newline at end of file
0 comments (0 inline, 0 general)