CSY/reowolf Changeset - 6d6c5b5f07ae · Centrum Wiskunde & Informatica (CWI)

Changeset - 6d6c5b5f07ae

Parent rev.

Child rev.

[Not reviewed]

1 6 0

MH - 3 years ago 2022-03-28 22:18:20
contact@maxhenger.nl

Attempting to fix token tree construction

7 files changed with 29 insertions and 519 deletions:

src/protocol/parser/pass_definitions.rs

src/protocol/parser/pass_imports.rs

src/protocol/parser/pass_symbols.rs

src/protocol/parser/pass_tokenizer.rs

src/protocol/parser/tokens.rs

std/std.global.pdl

tokens.txt

500

0 comments (0 inline, 0 general)

src/protocol/parser/pass_definitions.rs

➞

Show inline comments

@@ @@ -84,13 +84,13 @@ impl PassDefinitions { @@
     ) -> Result<(), ParseError> {
         let module = &modules[module_idx];
         let cur_range = &module.tokens.ranges[range_idx];
         debug_assert!(cur_range.range_kind == TokenRangeKind::Definition || cur_range.range_kind == TokenRangeKind::Code);
         // Detect which definition we're parsing
         let mut iter = module.tokens.iter_range(cur_range);
+        let mut iter = module.tokens.iter_range(cur_range.start, cur_range.end);
         loop {
             let next = iter.next();
             if next.is_none() {
                 return Ok(())
+            }

src/protocol/parser/pass_imports.rs

➞

Show inline comments

@@ @@ -59,13 +59,13 @@ impl PassImport { @@
         &mut self, modules: &[Module], module_idx: usize, ctx: &mut PassCtx, range_idx: usize
     ) -> Result<(), ParseError> {
         let module = &modules[module_idx];
         let import_range = &module.tokens.ranges[range_idx];
         debug_assert_eq!(import_range.range_kind, TokenRangeKind::Import);
         let mut iter = module.tokens.iter_range(import_range);
+        let mut iter = module.tokens.iter_range(import_range.start, import_range.end);
         // Consume "import"
         let (_import_ident, import_span) =
             consume_any_ident(&module.source, &mut iter)?;
         debug_assert_eq!(_import_ident, KW_IMPORT);

src/protocol/parser/pass_symbols.rs

➞

Show inline comments

@@ @@ -109,13 +109,13 @@ impl PassSymbols { @@
         Ok(())
+    }
     fn visit_pragma_range(&mut self, modules: &mut [Module], module_idx: usize, ctx: &mut PassCtx, range_idx: usize) -> Result<(), ParseError> {
         let module = &mut modules[module_idx];
         let range = &module.tokens.ranges[range_idx];
         let mut iter = module.tokens.iter_range(range);
+        let mut iter = module.tokens.iter_range(range.start, module.tokens.tokens.len() as u32);
         // Consume pragma name
         let (pragma_section, pragma_start, _) = consume_pragma(&module.source, &mut iter)?;
         // Consume pragma values
         if pragma_section == b"#module" {
@@ @@ -123,15 +123,18 @@ impl PassSymbols { @@
             if self.has_pragma_module {
                 return Err(ParseError::new_error_str_at_pos(&module.source, pragma_start, "module name is defined twice"));
+            }
             // Consume the domain-name
             let (module_name, module_span) = consume_domain_ident(&module.source, &mut iter)?;
             if iter.next().is_some() {
                 return Err(ParseError::new_error_str_at_pos(&module.source, iter.last_valid_pos(), "expected end of #module pragma after module name"));
+            }
             // TODO: Fix with newer token range parsing
             module.tokens.ranges[range_idx as usize].end = iter.token_index();
             // if iter.next().is_some() {
             //     return Err(ParseError::new_error_str_at_pos(&module.source, iter.last_valid_pos(), "expected end of #module pragma after module name"));
             // }
             // Add to heap and symbol table
             let pragma_span = InputSpan::from_positions(pragma_start, module_span.end);
             let module_name = ctx.pool.intern(module_name);
             let pragma_id = ctx.heap.alloc_pragma(|this| Pragma::Module(PragmaModule{
                 this,
@@ @@ -185,13 +188,13 @@ impl PassSymbols { @@
         let module = &modules[module_idx];
         let range = &module.tokens.ranges[range_idx];
         let definition_span = InputSpan::from_positions(
             module.tokens.start_pos(range),
             module.tokens.end_pos(range)
         );
         let mut iter = module.tokens.iter_range(range);
+        let mut iter = module.tokens.iter_range(range.start, range.end);
         // First ident must be type of symbol
         let (kw_text, _) = consume_any_ident(&module.source, &mut iter).unwrap();
         // Retrieve identifier of definition
         let identifier = consume_ident_interned(&module.source, &mut iter, ctx)?;

src/protocol/parser/pass_tokenizer.rs

➞

Show inline comments

@@ @@ -80,18 +80,16 @@ impl PassTokenizer { @@
+                }
             } else if self.is_line_comment_start(c, source) {
                 self.consume_line_comment(source, target)?;
             } else if self.is_block_comment_start(c, source) {
                 self.consume_block_comment(source, target)?;
             } else if is_whitespace(c) {
                 let contained_newline = self.consume_whitespace(source);
                 if contained_newline {
                     let range = &target.ranges[self.stack_idx];
                     if range.range_kind == TokenRangeKind::Pragma {
                         self.pop_range(target, target.tokens.len() as u32);
+                    }
                 self.consume_whitespace(source);
                 let range = &target.ranges[self.stack_idx];
                 if range.range_kind == TokenRangeKind::Pragma {
                     self.pop_range(target, target.tokens.len() as u32);
+                }
             } else {
                 let was_punctuation = self.maybe_parse_punctuation(c, source, target)?;
                 if let Some((token, token_pos)) = was_punctuation {
                     if token == TokenKind::OpenCurly {
                         self.curly_stack.push(token_pos);
@@ @@ -506,16 +504,15 @@ impl PassTokenizer { @@
         let mut end_pos = source.pos();
         debug_assert_eq!(begin_pos.line, end_pos.line);
         // Modify offset to not include the newline characters
         if cur_char == b'\n' {
             if prev_char == b'\r' {
                 end_pos.offset -= 2;
             } else {
                 end_pos.offset -= 1;
+            }
             // Consume final newline
             source.consume();
         } else {
             // End of comment was due to EOF
             debug_assert!(source.next().is_none())
+        }

src/protocol/parser/tokens.rs

➞

Show inline comments

@@ @@ -209,14 +209,15 @@ pub struct TokenBuffer { @@
 impl TokenBuffer {
     pub(crate) fn new() -> Self {
         Self{ tokens: Vec::new(), ranges: Vec::new() }
+    }
     pub(crate) fn iter_range<'a>(&'a self, range: &TokenRange) -> TokenIter<'a> {
         TokenIter::new(self, range.start as usize, range.end as usize)
     pub(crate) fn iter_range<'a>(&'a self, inclusive_start: u32, exclusive_end: u32) -> TokenIter<'a> {
         debug_assert!(exclusive_end as usize <= self.tokens.len());
         TokenIter::new(self, inclusive_start as usize, exclusive_end as usize)
+    }
     pub(crate) fn start_pos(&self, range: &TokenRange) -> InputPosition {
         self.tokens[range.start as usize].pos
+    }
@@ @@ -334,12 +335,16 @@ impl<'a> TokenIter<'a> { @@
             } else {
                 self.cur += 1;
+            }
+        }
+    }
     pub(crate) fn token_index(&self) -> u32 {
         return self.cur as u32;
+    }
     /// Saves the current iteration position, may be passed to `load` to return
     /// the iterator to a previous position.
     pub(crate) fn save(&self) -> (usize, usize) {
         (self.cur, self.end)
+    }

std/std.global.pdl

➞

Show inline comments

 #module std.global
 // Note: parsing of token ranges and pragma needs to change. For now we insert
 // spaces to work with the current system. Needs to be a system where the
 // pragmas, "func" keywords (and similar keywords) indicate initial points to
 // start parsing.
 func get<T>(in<T> input) -> T { #builtin }
 func put<T>(out<T> output, T value) -> #type_void { #builtin }
 func fires<T>(#type_portlike<T>) -> bool { #builtin }
+func fires<T>(#type_portlike <T>) -> bool { #builtin }
 func create<T>(#type_integerlike length) -> T[] { #builtin }
 func length<T>(#type_arraylike<T> array) -> u32 { #builtin }
+func length<T>(#type_arraylike <T> array) -> u32 { #builtin }
 func assert(bool condition) -> #type_void { #builtin }
 func print(string message) -> #type_void { #builtin }
@@ \ No newline at end of file @@

tokens.txt

➞

Show inline comments

deleted file

0 comments (0 inline, 0 general)