CSY/reowolf Changeset - 2693293dc537 · Centrum Wiskunde & Informatica (CWI)

Changeset - 2693293dc537

Parent rev.

Child rev.

[Not reviewed]

0 4 0

mh - 4 years ago 2022-03-29 16:43:55
contact@maxhenger.nl

Remove code that produced token ranges

4 files changed with 27 insertions and 252 deletions:

src/protocol/parser/mod.rs

src/protocol/parser/pass_tokenizer.rs

149

src/protocol/parser/tokens.rs

src/protocol/token_writer.rs

0 comments (0 inline, 0 general)

src/protocol/parser/mod.rs

➞

Show inline comments

@@ @@ -204,12 +204,6 @@ impl Parser { @@
             arch: &self.arch,
         };
         if let Some(filename) = &self.write_tokens_to {
             let mut writer = TokenWriter::new();
             let mut file = std::fs::File::create(std::path::Path::new(filename)).unwrap();
             writer.write(&mut file, &self.modules);
+        }
         // Advance all modules to the phase where all symbols are scanned
         for module_idx in 0..self.modules.len() {
             self.pass_symbols.parse(&mut self.modules, module_idx, &mut pass_ctx)?;
@@ @@ -222,6 +216,12 @@ impl Parser { @@
             self.pass_definitions.parse(&mut self.modules, module_idx, &mut pass_ctx)?;
+        }
         if let Some(filename) = &self.write_tokens_to {
             let mut writer = TokenWriter::new();
             let mut file = std::fs::File::create(std::path::Path::new(filename)).unwrap();
             writer.write(&mut file, &self.modules);
+        }
         // Add every known type to the type table
         self.type_table.build_base_types(&mut self.modules, &mut pass_ctx)?;

src/protocol/parser/pass_tokenizer.rs

➞

Show inline comments

@@ @@ -21,15 +21,12 @@ pub(crate) struct PassTokenizer { @@
     // unmatched opening braces, unmatched closing braces are detected
     // immediately.
     curly_stack: Vec<InputPosition>,
     // Points to an element in the `TokenBuffer.ranges` variable.
     stack_idx: usize,
+}
 impl PassTokenizer {
     pub(crate) fn new() -> Self {
         Self{
             curly_stack: Vec::with_capacity(32),
             stack_idx: 0
+        }
+    }
@@ @@ -37,23 +34,6 @@ impl PassTokenizer { @@
         // Assert source and buffer are at start
         debug_assert_eq!(source.pos().offset, 0);
         debug_assert!(target.tokens.is_empty());
         debug_assert!(target.ranges.is_empty());
         // Set up for tokenization by pushing the first range onto the stack.
         // This range may get transformed into the appropriate range kind later,
         // see `push_range` and `pop_range`.
         self.stack_idx = 0;
         target.ranges.push(TokenRange{
             parent_idx: NO_RELATION,
             range_kind: TokenRangeKind::Module,
             curly_depth: 0,
             start: 0,
             end: 0,
             num_child_ranges: 0,
             first_child_idx: NO_RELATION,
             last_child_idx: NO_RELATION,
             next_sibling_idx: NO_RELATION,
         });
         // Main tokenization loop
         while let Some(c) = source.next() {
@@ @@ -68,10 +48,8 @@ impl PassTokenizer { @@
                 if demarks_symbol(ident) {
                     self.emit_marker(target, TokenMarkerKind::Definition, token_index);
                     self.push_range(target, TokenRangeKind::Definition, token_index);
                 } else if demarks_import(ident) {
                     self.emit_marker(target, TokenMarkerKind::Import, token_index);
                     self.push_range(target, TokenRangeKind::Import, token_index);
+                }
             } else if is_integer_literal_start(c) {
                 self.consume_number(source, target)?;
@@ @@ -79,7 +57,6 @@ impl PassTokenizer { @@
                 let was_pragma = self.consume_pragma_or_pound(c, source, target)?;
                 if was_pragma {
                     self.emit_marker(target, TokenMarkerKind::Pragma, token_index);
                     self.push_range(target, TokenRangeKind::Pragma, token_index);
+                }
             } else if self.is_line_comment_start(c, source) {
                 self.consume_line_comment(source, target)?;
@@ @@ -87,10 +64,6 @@ impl PassTokenizer { @@
                 self.consume_block_comment(source, target)?;
             } else if is_whitespace(c) {
                 self.consume_whitespace(source);
                 let range = &target.ranges[self.stack_idx];
                 if range.range_kind == TokenRangeKind::Pragma {
                     self.pop_range(target, target.tokens.len() as u32);
+                }
             } else {
                 let was_punctuation = self.maybe_parse_punctuation(c, source, target)?;
                 if let Some((token, token_pos)) = was_punctuation {
@@ @@ -106,20 +79,6 @@ impl PassTokenizer { @@
+                        }
                         self.curly_stack.pop();
                         let range = &target.ranges[self.stack_idx];
                         if range.range_kind == TokenRangeKind::Definition && range.curly_depth == self.curly_stack.len() as u32 {
                             self.pop_range(target, target.tokens.len() as u32);
+                        }
                         // Exit early if we have more closing curly braces than
                         // opening curly braces
                     } else if token == TokenKind::SemiColon {
                         // Check if this marks the end of an import
                         let range = &target.ranges[self.stack_idx];
                         if range.range_kind == TokenRangeKind::Import {
                             self.pop_range(target, target.tokens.len() as u32);
+                        }
+                    }
                 } else {
                     return Err(ParseError::new_error_str_at_pos(
@@ @@ -143,21 +102,6 @@ impl PassTokenizer { @@
             ));
+        }
         // Ranges that did not depend on curly braces may have missing tokens.
         // So close all of the active tokens
         while self.stack_idx != 0 {
             self.pop_range(target, target.tokens.len() as u32);
+        }
         // And finally, we may have a token range at the end that doesn't belong
         // to a range yet, so insert a "code" range if this is the case.
         debug_assert_eq!(self.stack_idx, 0);
         let last_registered_idx = target.ranges[0].end;
         let last_token_idx = target.tokens.len() as u32;
         if last_registered_idx != last_token_idx {
             self.add_code_range(target, 0, last_registered_idx, last_token_idx, NO_RELATION);
+        }
         Ok(())
+    }
@@ @@ -624,40 +568,6 @@ impl PassTokenizer { @@
         has_newline
+    }
     fn add_code_range(
         &mut self, target: &mut TokenBuffer, parent_idx: i32,
         code_start_idx: u32, code_end_idx: u32, next_sibling_idx: i32
     ) {
         let new_range_idx = target.ranges.len() as i32;
         let parent_range = &mut target.ranges[parent_idx as usize];
         debug_assert_ne!(parent_range.end, code_end_idx, "called push_code_range without a need to do so");
         let sibling_idx = parent_range.last_child_idx;
         parent_range.last_child_idx = new_range_idx;
         parent_range.end = code_end_idx;
         parent_range.num_child_ranges += 1;
         let curly_depth = self.curly_stack.len() as u32;
         target.ranges.push(TokenRange{
             parent_idx,
             range_kind: TokenRangeKind::Code,
             curly_depth,
             start: code_start_idx,
             end: code_end_idx,
             num_child_ranges: 0,
             first_child_idx: NO_RELATION,
             last_child_idx: NO_RELATION,
             next_sibling_idx,
         });
         // Fix up the sibling indices
         if sibling_idx != NO_RELATION {
             let sibling_range = &mut target.ranges[sibling_idx as usize];
             sibling_range.next_sibling_idx = new_range_idx;
+        }
+    }
     fn emit_marker(&mut self, target: &mut TokenBuffer, kind: TokenMarkerKind, first_token: u32) {
         debug_assert!(
             target.markers
@@ @@ -674,65 +584,6 @@ impl PassTokenizer { @@
         });
+    }
     fn push_range(&mut self, target: &mut TokenBuffer, range_kind: TokenRangeKind, first_token_idx: u32) {
         let new_range_idx = target.ranges.len() as i32;
         let parent_idx = self.stack_idx as i32;
         let parent_range = &mut target.ranges[self.stack_idx];
         if parent_range.first_child_idx == NO_RELATION {
             parent_range.first_child_idx = new_range_idx;
+        }
         let last_registered_idx = parent_range.end;
         if last_registered_idx != first_token_idx {
             self.add_code_range(target, parent_idx, last_registered_idx, first_token_idx, new_range_idx + 1);
+        }
         // Push the new range
         self.stack_idx = target.ranges.len();
         let curly_depth = self.curly_stack.len() as u32;
         target.ranges.push(TokenRange{
             parent_idx,
             range_kind,
             curly_depth,
             start: first_token_idx,
             end: first_token_idx, // modified when popped
             num_child_ranges: 0,
             first_child_idx: NO_RELATION,
             last_child_idx: NO_RELATION,
             next_sibling_idx: NO_RELATION
         })
+    }
     fn pop_range(&mut self, target: &mut TokenBuffer, end_token_idx: u32) {
         let popped_idx = self.stack_idx as i32;
         let popped_range = &mut target.ranges[self.stack_idx];
         debug_assert!(self.stack_idx != 0, "attempting to pop top-level range");
         // Fix up the current range before going back to parent
         popped_range.end = end_token_idx;
         debug_assert_ne!(popped_range.start, end_token_idx);
         // Go back to parent and fix up its child pointers, but remember the
         // last child, so we can link it to the newly popped range.
         self.stack_idx = popped_range.parent_idx as usize;
         let parent = &mut target.ranges[self.stack_idx];
         if parent.first_child_idx == NO_RELATION {
             parent.first_child_idx = popped_idx;
+        }
         let prev_sibling_idx = parent.last_child_idx;
         parent.last_child_idx = popped_idx;
         parent.end = end_token_idx;
         parent.num_child_ranges += 1;
         // Fix up the sibling (if it exists)
         if prev_sibling_idx != NO_RELATION {
             let sibling = &mut target.ranges[prev_sibling_idx as usize];
             sibling.next_sibling_idx = popped_idx;
+        }
+    }
     fn check_ascii(&self, source: &InputSource) -> Result<(), ParseError> {
         match source.next() {
             Some(c) if !c.is_ascii() => {

src/protocol/parser/tokens.rs

➞

Show inline comments

@@ @@ -193,42 +193,9 @@ pub struct TokenMarker { @@
     pub handled: bool,
+}
 /// The kind of token ranges that are specially parsed by the tokenizer.
 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
 pub enum TokenRangeKind {
     Module,
     Pragma,
     Import,
     Definition,
     Code,
+}
 pub const NO_RELATION: i32 = -1;
 pub const NO_SIBLING: i32 = NO_RELATION;
 /// A range of tokens with a specific meaning. Such a range is part of a tree
 /// where each parent tree envelops all of its children.
 #[derive(Debug)]
 pub struct TokenRange {
     // Index of parent in `TokenBuffer.ranges`, does not have a parent if the
     // range kind is Module, in that case the parent index is -1.
     pub parent_idx: i32,
     pub range_kind: TokenRangeKind,
     pub curly_depth: u32,
     // Offsets into `TokenBuffer.ranges`: the tokens belonging to this range.
     pub start: u32,             // first token (inclusive index)
     pub end: u32,               // last token (exclusive index)
     // Child ranges
     pub num_child_ranges: u32,  // Number of subranges
     pub first_child_idx: i32,   // First subrange (or -1 if no subranges)
     pub last_child_idx: i32,    // Last subrange (or -1 if no subranges)
     pub next_sibling_idx: i32,  // Next subrange (or -1 if no next subrange)
+}
 pub struct TokenBuffer {
     pub tokens: Vec<Token>,
     pub markers: Vec<TokenMarker>,
     pub ranges: Vec<TokenRange>,
+}
 impl TokenBuffer {
@@ @@ -236,7 +203,6 @@ impl TokenBuffer { @@
         return Self{
             tokens: Vec::new(),
             markers: Vec::new(),
             ranges: Vec::new()
         };
+    }

src/protocol/token_writer.rs

➞

Show inline comments

 #![allow(dead_code)]
 use std::fmt::Write;
+use std::fmt::{Write, Error as FmtError};
 use std::io::Write as IOWrite;
 use crate::protocol::input_source::{InputSource, InputSpan};
 use crate::protocol::parser::Module;
-use crate::protocol::tokens::{Token, TokenKind, TokenRange};
+use crate::protocol::tokens::{Token, TokenKind, TokenMarker};
 pub(crate) struct TokenWriter {
     buffer: String,
@@ @@ -35,77 +35,33 @@ impl TokenWriter { @@
             None => self.buffer.push_str("Unnamed module\n"),
+        }
         let mut range_index = -1;
         if !module.tokens.ranges.is_empty() {
             range_index = 0;
+        }
         while range_index >= 0 {
             range_index = self.write_token_range(
                 &module.source, &module.tokens.tokens, &module.tokens.ranges, range_index, 1
             );
         self.write_marker_array(&module.tokens.markers, 1).expect("write markers");
         self.write_token_array(&module.source, &module.tokens.tokens, 1).expect("write tokens");
+    }
+    }
     /// Writes a single token range. Recurses if there are any child ranges.
     /// Returns the next token range index to iterate over (or a negative
     /// number, if there are no more sibling ranges).
     fn write_token_range(&mut self, source: &InputSource, tokens: &[Token], ranges: &[TokenRange], range_index: i32, indent: u32) -> i32 {
         // Write range kind
         let range = &ranges[range_index as usize];
         self.write_dashed_indent(indent);
         writeln!(self.buffer, "Range: {:?}", range.range_kind);
         // Write tokens/lines it spans
         let first_token_pos = tokens[range.start as usize].pos;
         let last_token_pos = if (range.end as usize) < tokens.len() {
             tokens[range.end as usize].pos
         } else {
             tokens.last().unwrap().pos
         };
         let first_source_col = source.get_column(first_token_pos);
         let last_source_col = source.get_column(last_token_pos);
         self.write_indent(indent);
         writeln!(
             self.buffer, "Source: token {} to {}, file {}:{}:{} to {}:{}",
             range.start, range.end, source.filename,
             first_token_pos.line, first_source_col,
             last_token_pos.line, last_source_col
         );
         let next_sibling_index = range.next_sibling_idx;
         if range.num_child_ranges == 0 {
             // No child ranges, so dump the tokens here
             debug_assert!(range.first_child_idx < 0);
             self.write_token_array(source, tokens, range, indent);
         } else {
             // Child ranges
             debug_assert!(range.first_child_idx >= 0);
     fn write_marker_array(&mut self, markers: &[TokenMarker], indent: u32) -> Result<(), FmtError> {
         self.write_indent(indent);
-            writeln!(self.buffer, "Children: [");
+        writeln!(self.buffer, "Markers: [")?;
             let mut range_index = range.first_child_idx;
             while range_index >= 0 {
                 range_index = self.write_token_range(source, tokens, ranges, range_index, indent + 1);
         let marker_indent = indent + 1;
         for marker in markers {
             self.write_indent(marker_indent);
             writeln!(self.buffer, "{:?}", marker)?;
+        }
         self.write_indent(indent);
             writeln!(self.buffer, "]");
+        }
         writeln!(self.buffer, "]")?;
         // Wrote everything, return the next sibling token range
         return next_sibling_index;
         return Ok(());
+    }
-    fn write_token_array(&mut self, source: &InputSource, tokens: &[Token], range: &TokenRange, indent: u32) {
+    fn write_token_array(&mut self, source: &InputSource, tokens: &[Token], indent: u32) -> Result<(), FmtError> {
         self.write_indent(indent);
         writeln!(self.buffer, "Tokens: [");
+        writeln!(self.buffer, "Tokens: [")?;
         let num_tokens = tokens.len();
         let token_indent = indent + 1;
-        for token_index in range.start as usize..range.end as usize {
+        for token_index in 0..num_tokens {
             // Skip uninteresting tokens
             let token = &tokens[token_index];
             if token.kind == TokenKind::SpanEnd {
@@ @@ -113,19 +69,21 @@ impl TokenWriter { @@
+            }
             self.write_indent(token_indent);
             write!(self.buffer, "{:?} (index {})", token.kind, token_index);
+            write!(self.buffer, "{:?} (index {})", token.kind, token_index)?;
             if token.kind.has_span_end() {
                 let token_start = token.pos;
                 let token_end = tokens[token_index + 1].pos;
                 let section = source.section_at_span(InputSpan::from_positions(token_start, token_end));
                 writeln!(self.buffer, " text: {}", String::from_utf8_lossy(section));
+                writeln!(self.buffer, " text: {}", String::from_utf8_lossy(section))?;
             } else {
                 self.buffer.push('\n');
+            }
+        }
         self.write_indent(indent);
         writeln!(self.buffer, "]");
         writeln!(self.buffer, "]")?;
         return Ok(());
+    }
     fn write_dashed_indent(&mut self, indent: u32) {

0 comments (0 inline, 0 general)