Changeset - a52ba09256ad
[Not reviewed]
0 2 0
MH - 4 years ago 2021-04-23 10:26:53
henger@cwi.nl
WIP on compiler rearchitecting
2 files changed with 102 insertions and 8 deletions:
0 comments (0 inline, 0 general)
src/protocol/parser/pass_definitions.rs
Show inline comments
 
@@ -692,51 +692,68 @@ impl PassDefinitions {
 
            let mut expressions = Vec::new();
 
            consume_comma_separated(
 
                TokenKind::OpenCurly, TokenKind::CloseCurly, &module.source, iter,
 
                |source, iter| self.consume_expression(module, iter, ctx),
 
                &mut expressions, "an expression", "a list of expressions", Some(&mut end_pos)
 
            )?;
 

	
 
            // TODO: Turn into literal
 
            result = ctx.heap.alloc_array_expression(|this| ArrayExpression{
 
                this,
 
                span: InputSpan::from_positions(start_pos, end_pos),
 
                elements: expressions,
 
                parent: ExpressionParent::None,
 
                concrete_type: ConcreteType::default(),
 
            }).upcast();
 
        } else if next == Some(TokenKind::Integer) {
 
            let (literal, span) = consume_integer_literal(&module.source, iter, &mut self.buffer)?;
 
            result = ctx.heap.alloc_literal_expression(|this| LiteralExpression{
 
                this, span,
 
                value: Literal::Integer(LiteralInteger{ unsigned_value: literal, negated: false }),
 
                parent: ExpressionParent::None,
 
                concrete_type: ConcreteType::default(),
 
            }).upcast();
 
        } else if next == Some(TokenKind::String) {
 
            let (text, span) = consume_string_literal(&module.source, iter, &mut self.buffer)?;
 
            let span = consume_string_literal(&module.source, iter, &mut self.buffer)?;
 
            let interned = ctx.pool.intern(self.buffer.as_bytes());
 
            result = ctx.heap.alloc_literal_expression(|this| LiteralExpression{
 
                this, span,
 
                value: Literal::String(interned),
 
                parent: ExpressionParent::None,
 
                concrete_type: ConcreteType::default(),
 
            }).upcast();
 
        } else if next == Some(TokenKind::Character) {
 

	
 
            let (character, span) = consume_character_literal(&module.source, iter)?;
 
            result = ctx.heap.alloc_literal_expression(|this| LiteralExpression{
 
                this, span,
 
                value: Literal::Character(character),
 
                parent: ExpressionParent::None,
 
                concrete_type: ConcreteType::default(),
 
            }).upcast();
 
        } else if next == Some(TokenKind::Ident) {
 
            // May be a variable, a type instantiation or a function call. If we
 
            // have a single identifier that we cannot find in the type table
 
            // then we're going to assume that we're dealign with a variable.
 
        }
 

	
 
        Ok(result)
 
    }
 

	
 
    //--------------------------------------------------------------------------
 
    // Expression Utilities
 
    //--------------------------------------------------------------------------
 

	
 
    #[inline]
 
    fn consume_generic_binary_expression<
 
        M: Fn(Option<TokenKind>) -> Option<BinaryOperator>,
 
        F: Fn(&mut PassDefinitions, &Module, &mut TokenIter, &mut PassCtx) -> Result<ExpressionId, ParseError>
 
    >(
 
        &mut self, module: &Module, iter: &mut TokenIter, ctx: &mut PassCtx, match_fn: M, higher_precedence_fn: F
 
    ) -> Result<ExpressionId, ParseError> {
 
        let mut result = higher_precedence_fn(self, module, iter, ctx)?;
 
        while let Some(operation) = match_fn(iter.next()) {
 
            let span = iter.next_span();
 
            iter.consume();
 

	
 
            let left = result;
 
            let right = higher_precedence_fn(self, module, iter, ctx)?;
 

	
src/protocol/parser/token_parsing.rs
Show inline comments
 
@@ -250,63 +250,140 @@ pub(crate) fn consume_integer_literal(source: &InputSource, iter: &mut TokenIter
 
        if char == b'_' {
 
            continue;
 
        }
 
        if !char.is_ascii_digit() {
 
            return Err(ParseError::new_error_at_span(
 
                source, integer_span,
 
                format!("incorrectly formatted {} number", radix_name)
 
            ));
 
        }
 
        buffer.push(char::from(char));
 
    }
 

	
 
    // Use the cleaned up string to convert to integer
 
    match u64::from_str_radix(&buffer, radix) {
 
        Ok(number) => Ok((number, integer_span)),
 
        Err(_) => Err(ParseError::new_error_at_span(
 
            source, integer_span,
 
            format!("incorrectly formatted {} number", radix_name)
 
        )),
 
    }
 
}
 

	
 
/// Consumes a character literal. We currently support a limited number of
 
/// backslash-escaped characters
 
pub(crate) fn consume_character_literal(source: &InputSource, iter: &mut TokenIter, buffer: &mut String) -> Result<char, ParseError> {
 
pub(crate) fn consume_character_literal(
 
    source: &InputSource, iter: &mut TokenIter
 
) -> Result<(char, InputSpan), ParseError> {
 
    if Some(TokenKind::Character) != iter.next() {
 
        return Err(ParseError::new_error_str_at_pos(source, iter.last_valid_pos(), "expected a character literal"));
 
    }
 
    let char_span = iter.next_span();
 
    let span = iter.next_span();
 
    iter.consume();
 

	
 
    let char_text = source.section_at_span(char_span);
 
    let char_text = source.section_at_span(span);
 
    if !char_text.is_ascii() {
 
        return Err(ParseError::new_error_str_at_span(
 
            source, span, "expected an ASCII character literal"
 
        ));
 
    }
 

	
 
    match char_text.len() {
 
        0 => return Err(ParseError::new_error_str_at_span(source, span, "too little characters in character literal")),
 
        1 => {
 
            // We already know the text is ascii, so just throw an error if we have the escape
 
            // character.
 
            if char_text[0] == b'\\' {
 
                return Err(ParseError::new_error_str_at_span(source, span, "escape character without subsequent character"));
 
            }
 
            return Ok((char_text[0] as char, span));
 
        },
 
        2 => {
 
            if char_text[0] == b'\\' {
 
                let result = parse_escaped_character(char_text[1])?;
 
                return Ok((result, span))
 
            }
 
        },
 
        _ => {}
 
    }
 

	
 
    //
 
    return Err(ParseError::new_error_str_at_span(source, span, "too many characters in character literal"))
 
}
 

	
 
/// Consumes a string literal. We currently support a limited number of
 
/// backslash-escaped characters.
 
pub(crate) fn consume_string_literal(source: &InputSource, iter: &mut TokenIter, buffer: &mut String) -> Result<(>
 
/// backslash-escaped characters. Note that the result is stored in the
 
/// buffer.
 
pub(crate) fn consume_string_literal(
 
    source: &InputSource, iter: &mut TokenIter, buffer: &mut String
 
) -> Result<InputSpan, ParseError> {
 
    if Some(TokenKind::String) != iter.next() {
 
        return Err(ParseError::new_error_str_at_pos(source, iter.last_valid_pos(), "expected a string literal"));
 
    }
 

	
 
    buffer.clear();
 
    let span = iter.next_span();
 
    iter.consume();
 

	
 
    let text = source.section_at_span(span);
 
    if !text.is_ascii() {
 
        return Err(ParseError::new_error_str_at_span(source, span, "expected an ASCII string literal"));
 
    }
 
    buffer.reserve(text.len());
 

	
 
    let mut was_escape = false;
 
    for idx in 0..text.len() {
 
        let cur = text[idx];
 
        if cur != b'\\' {
 
            if was_escape {
 
                let to_push = parse_escaped_character(cur)?;
 
                buffer.push(to_push);
 
            } else {
 
                buffer.push(cur as char);
 
            }
 
            was_escape = false;
 
        } else {
 
            was_escape = true;
 
        }
 
    }
 

	
 
    Ok(span)
 
}
 

	
 
fn parse_escaped_character(v: u8) -> Result<char, ParseError> {
 
    let result = match v {
 
        b'r' => '\r',
 
        b'n' => '\n',
 
        b't' => '\t',
 
        b'0' => '\0',
 
        b'\\' => '\\',
 
        b'\'' => '\'',
 
        b'"' => '"',
 
        v => return Err(ParseError::new_error_at_span(
 
            source, span, format!("unexpected escaped character '{}'", v)
 
        )),
 
    };
 
    Ok(result)
 
}
 

	
 
pub(crate) fn consume_pragma<'a>(source: &'a InputSource, iter: &mut TokenIter) -> Result<(&'a [u8], InputPosition, InputPosition), ParseError> {
 
    if Some(TokenKind::Pragma) != iter.next() {
 
        return Err(ParseError::new_error_str_at_pos(source, iter.last_valid_pos(), "expected a pragma"));
 
    }
 
    let (pragma_start, pragma_end) = iter.next_positions();
 
    iter.consume();
 
    Ok((source.section_at_pos(pragma_start, pragma_end), pragma_start, pragma_end))
 
}
 

	
 
pub(crate) fn has_ident(source: &InputSource, iter: &mut TokenIter, expected: &[u8]) -> bool {
 
    peek_ident(source, iter).map_or(false, |section| section == expected)
 
}
 

	
 
pub(crate) fn peek_ident<'a>(source: &'a InputSource, iter: &mut TokenIter) -> Option<&'a [u8]> {
 
    if Some(TokenKind::Ident) == iter.next() {
 
        let (start, end) = iter.next_positions();
 
        return Some(source.section_at_pos(start, end))
 
    }
 

	
 
    None
 
}
 

	
 
/// Consumes any identifier and returns it together with its span. Does not
0 comments (0 inline, 0 general)