diff --git a/src/protocol/parser/token_parsing.rs b/src/protocol/parser/token_parsing.rs index 0142174de5614bf9ea6e795127785fac1f2fece8..28663793ff335f24a9db786c9514ece91f92536b 100644 --- a/src/protocol/parser/token_parsing.rs +++ b/src/protocol/parser/token_parsing.rs @@ -390,28 +390,57 @@ pub(crate) fn consume_character_literal( return Err(ParseError::new_error_str_at_span(source, span, "too many characters in character literal")) } +/// Consumes a bytestring literal: a string interpreted as a byte array. See +/// `consume_string_literal` for further remarks. +pub(crate) fn consume_bytestring_literal( + source: &InputSource, iter: &mut TokenIter, buffer: &mut String +) -> Result { + // Retrieve string span, adjust to remove the leading "b" character + if Some(TokenKind::Bytestring) != iter.next() { + return Err(ParseError::new_error_str_at_pos(source, iter.last_valid_pos(), "expected a bytestring literal")); + } + + let span = iter.next_span(); + iter.consume(); + debug_assert_eq!(source.section_at_pos(span.begin, span.begin.with_offset(1)), b"b"); + + // Parse into buffer + let text_span = InputSpan::from_positions(span.begin.with_offset(1), span.end); + parse_escaped_string(source, text_span, buffer)?; + + return Ok(span); +} + /// Consumes a string literal. We currently support a limited number of /// backslash-escaped characters. Note that the result is stored in the /// buffer. pub(crate) fn consume_string_literal( source: &InputSource, iter: &mut TokenIter, buffer: &mut String ) -> Result { + // Retrieve string span from token stream if Some(TokenKind::String) != iter.next() { return Err(ParseError::new_error_str_at_pos(source, iter.last_valid_pos(), "expected a string literal")); } - buffer.clear(); let span = iter.next_span(); iter.consume(); - let text = source.section_at_span(span); + // Parse into buffer + parse_escaped_string(source, span, buffer)?; + + return Ok(span); +} + +fn parse_escaped_string(source: &InputSource, text_span: InputSpan, buffer: &mut String) -> Result<(), ParseError> { + let text = source.section_at_span(text_span); if !text.is_ascii() { - return Err(ParseError::new_error_str_at_span(source, span, "expected an ASCII string literal")); + return Err(ParseError::new_error_str_at_span(source, text_span, "expected an ASCII string literal")); } debug_assert_eq!(text[0], b'"'); // here as kind of a reminder: the span includes the bounding quotation marks debug_assert_eq!(text[text.len() - 1], b'"'); + buffer.clear(); buffer.reserve(text.len() - 2); let mut was_escape = false; @@ -419,9 +448,9 @@ pub(crate) fn consume_string_literal( let cur = text[idx]; let is_escape = cur == b'\\'; if was_escape { - let to_push = parse_escaped_character(source, span, cur)?; + let to_push = parse_escaped_character(source, text_span, cur)?; buffer.push(to_push); - } else { + } else if !is_escape { buffer.push(cur as char); } @@ -434,9 +463,10 @@ pub(crate) fn consume_string_literal( debug_assert!(!was_escape); // because otherwise we couldn't have ended the string literal - Ok(span) + return Ok(()); } +#[inline] fn parse_escaped_character(source: &InputSource, literal_span: InputSpan, v: u8) -> Result { let result = match v { b'r' => '\r',