diff --git a/src/protocol/parser/token_parsing.rs b/src/protocol/parser/token_parsing.rs index 47f02e7f6c2367f467a348b4363af2e28a590014..28663793ff335f24a9db786c9514ece91f92536b 100644 --- a/src/protocol/parser/token_parsing.rs +++ b/src/protocol/parser/token_parsing.rs @@ -86,6 +86,15 @@ pub(crate) const KW_TYPE_CHAR: &'static [u8] = KW_TYPE_CHAR_STR.as_bytes(); pub(crate) const KW_TYPE_STRING: &'static [u8] = KW_TYPE_STRING_STR.as_bytes(); pub(crate) const KW_TYPE_INFERRED: &'static [u8] = KW_TYPE_INFERRED_STR.as_bytes(); +// Builtin pragma types +// Not usable by the programmer, but usable in the standard library. These hint +// at the fact that we need a different system (e.g. function overloading) +pub(crate) const PRAGMA_TYPE_VOID: &'static [u8] = b"#type_void"; +pub(crate) const PRAGMA_TYPE_PORTLIKE: &'static [u8] = b"#type_portlike"; +pub(crate) const PRAGMA_TYPE_INTEGERLIKE: &'static [u8] = b"#type_integerlike"; +pub(crate) const PRAGMA_TYPE_ARRAYLIKE: &'static [u8] = b"#type_arraylike"; + + /// A special trait for when consuming comma-separated things such that we can /// push them onto a `Vec` and onto a `ScopedSection`. As we monomorph for /// very specific comma-separated cases I don't expect polymorph bloat. @@ -381,28 +390,57 @@ pub(crate) fn consume_character_literal( return Err(ParseError::new_error_str_at_span(source, span, "too many characters in character literal")) } +/// Consumes a bytestring literal: a string interpreted as a byte array. See +/// `consume_string_literal` for further remarks. +pub(crate) fn consume_bytestring_literal( + source: &InputSource, iter: &mut TokenIter, buffer: &mut String +) -> Result { + // Retrieve string span, adjust to remove the leading "b" character + if Some(TokenKind::Bytestring) != iter.next() { + return Err(ParseError::new_error_str_at_pos(source, iter.last_valid_pos(), "expected a bytestring literal")); + } + + let span = iter.next_span(); + iter.consume(); + debug_assert_eq!(source.section_at_pos(span.begin, span.begin.with_offset(1)), b"b"); + + // Parse into buffer + let text_span = InputSpan::from_positions(span.begin.with_offset(1), span.end); + parse_escaped_string(source, text_span, buffer)?; + + return Ok(span); +} + /// Consumes a string literal. We currently support a limited number of /// backslash-escaped characters. Note that the result is stored in the /// buffer. pub(crate) fn consume_string_literal( source: &InputSource, iter: &mut TokenIter, buffer: &mut String ) -> Result { + // Retrieve string span from token stream if Some(TokenKind::String) != iter.next() { return Err(ParseError::new_error_str_at_pos(source, iter.last_valid_pos(), "expected a string literal")); } - buffer.clear(); let span = iter.next_span(); iter.consume(); - let text = source.section_at_span(span); + // Parse into buffer + parse_escaped_string(source, span, buffer)?; + + return Ok(span); +} + +fn parse_escaped_string(source: &InputSource, text_span: InputSpan, buffer: &mut String) -> Result<(), ParseError> { + let text = source.section_at_span(text_span); if !text.is_ascii() { - return Err(ParseError::new_error_str_at_span(source, span, "expected an ASCII string literal")); + return Err(ParseError::new_error_str_at_span(source, text_span, "expected an ASCII string literal")); } debug_assert_eq!(text[0], b'"'); // here as kind of a reminder: the span includes the bounding quotation marks debug_assert_eq!(text[text.len() - 1], b'"'); + buffer.clear(); buffer.reserve(text.len() - 2); let mut was_escape = false; @@ -410,9 +448,9 @@ pub(crate) fn consume_string_literal( let cur = text[idx]; let is_escape = cur == b'\\'; if was_escape { - let to_push = parse_escaped_character(source, span, cur)?; + let to_push = parse_escaped_character(source, text_span, cur)?; buffer.push(to_push); - } else { + } else if !is_escape { buffer.push(cur as char); } @@ -425,9 +463,10 @@ pub(crate) fn consume_string_literal( debug_assert!(!was_escape); // because otherwise we couldn't have ended the string literal - Ok(span) + return Ok(()); } +#[inline] fn parse_escaped_character(source: &InputSource, literal_span: InputSpan, v: u8) -> Result { let result = match v { b'r' => '\r', @@ -449,13 +488,13 @@ fn parse_escaped_character(source: &InputSource, literal_span: InputSpan, v: u8) Ok(result) } -pub(crate) fn consume_pragma<'a>(source: &'a InputSource, iter: &mut TokenIter) -> Result<(&'a [u8], InputPosition, InputPosition), ParseError> { +pub(crate) fn consume_pragma<'a>(source: &'a InputSource, iter: &mut TokenIter) -> Result<(&'a [u8], InputSpan), ParseError> { if Some(TokenKind::Pragma) != iter.next() { return Err(ParseError::new_error_str_at_pos(source, iter.last_valid_pos(), "expected a pragma")); } - let (pragma_start, pragma_end) = iter.next_positions(); + let pragma_span = iter.next_span(); iter.consume(); - Ok((source.section_at_pos(pragma_start, pragma_end), pragma_start, pragma_end)) + Ok((source.section_at_span(pragma_span), pragma_span)) } pub(crate) fn has_ident(source: &InputSource, iter: &mut TokenIter, expected: &[u8]) -> bool { @@ -540,7 +579,6 @@ fn is_reserved_expression_keyword(text: &[u8]) -> bool { match text { KW_LET | KW_CAST | KW_LIT_TRUE | KW_LIT_FALSE | KW_LIT_NULL | - KW_FUNC_GET | KW_FUNC_PUT | KW_FUNC_FIRES | KW_FUNC_CREATE | KW_FUNC_ASSERT | KW_FUNC_LENGTH | KW_FUNC_PRINT => true, _ => false, } } @@ -603,15 +641,16 @@ pub(crate) fn construct_symbol_conflict_error( format!("the type '{}' imported here", symbol.name.as_str()), Some(import.as_symbols().span) ); - } else { - // This is a defined symbol. So this must mean that the - // error was caused by it being defined. - debug_assert_eq!(definition.defined_in_module, module.root_id); - + } else if definition.defined_in_module == module.root_id { + // This is a symbol defined in the same module return ( format!("the type '{}' defined here", symbol.name.as_str()), Some(definition.identifier_span) ) + } else { + // Not imported, not defined in the module, so must be + // a global + return (format!("the global '{}'", symbol.name.as_str()), None) } } }