From 6d6c5b5f07ae295c4b3795013150c355ae7a53c0 2022-03-28 22:18:20 From: MH Date: 2022-03-28 22:18:20 Subject: [PATCH] Attempting to fix token tree construction --- diff --git a/src/protocol/parser/pass_definitions.rs b/src/protocol/parser/pass_definitions.rs index 024c1ea3a753a01b85fb332105ee12d4fb7527f6..607f8caacc21fb71b60c8bca23e60ec45a27fa5d 100644 --- a/src/protocol/parser/pass_definitions.rs +++ b/src/protocol/parser/pass_definitions.rs @@ -87,7 +87,7 @@ impl PassDefinitions { debug_assert!(cur_range.range_kind == TokenRangeKind::Definition || cur_range.range_kind == TokenRangeKind::Code); // Detect which definition we're parsing - let mut iter = module.tokens.iter_range(cur_range); + let mut iter = module.tokens.iter_range(cur_range.start, cur_range.end); loop { let next = iter.next(); if next.is_none() { diff --git a/src/protocol/parser/pass_imports.rs b/src/protocol/parser/pass_imports.rs index 98c06ff2fa3f4c2d752e446b720efc40f160e2ca..e64e07e87634183cc240d0cf0709336f8c0a7017 100644 --- a/src/protocol/parser/pass_imports.rs +++ b/src/protocol/parser/pass_imports.rs @@ -62,7 +62,7 @@ impl PassImport { let import_range = &module.tokens.ranges[range_idx]; debug_assert_eq!(import_range.range_kind, TokenRangeKind::Import); - let mut iter = module.tokens.iter_range(import_range); + let mut iter = module.tokens.iter_range(import_range.start, import_range.end); // Consume "import" let (_import_ident, import_span) = diff --git a/src/protocol/parser/pass_symbols.rs b/src/protocol/parser/pass_symbols.rs index f4bbe5365700fae21e6adb76ebfaea3831f22c87..28d7ba5fce047bdc25d8133f5b7cb6beb42f66e8 100644 --- a/src/protocol/parser/pass_symbols.rs +++ b/src/protocol/parser/pass_symbols.rs @@ -112,7 +112,7 @@ impl PassSymbols { fn visit_pragma_range(&mut self, modules: &mut [Module], module_idx: usize, ctx: &mut PassCtx, range_idx: usize) -> Result<(), ParseError> { let module = &mut modules[module_idx]; let range = &module.tokens.ranges[range_idx]; - let mut iter = module.tokens.iter_range(range); + let mut iter = module.tokens.iter_range(range.start, module.tokens.tokens.len() as u32); // Consume pragma name let (pragma_section, pragma_start, _) = consume_pragma(&module.source, &mut iter)?; @@ -126,9 +126,12 @@ impl PassSymbols { // Consume the domain-name let (module_name, module_span) = consume_domain_ident(&module.source, &mut iter)?; - if iter.next().is_some() { - return Err(ParseError::new_error_str_at_pos(&module.source, iter.last_valid_pos(), "expected end of #module pragma after module name")); - } + + // TODO: Fix with newer token range parsing + module.tokens.ranges[range_idx as usize].end = iter.token_index(); + // if iter.next().is_some() { + // return Err(ParseError::new_error_str_at_pos(&module.source, iter.last_valid_pos(), "expected end of #module pragma after module name")); + // } // Add to heap and symbol table let pragma_span = InputSpan::from_positions(pragma_start, module_span.end); @@ -188,7 +191,7 @@ impl PassSymbols { module.tokens.start_pos(range), module.tokens.end_pos(range) ); - let mut iter = module.tokens.iter_range(range); + let mut iter = module.tokens.iter_range(range.start, range.end); // First ident must be type of symbol let (kw_text, _) = consume_any_ident(&module.source, &mut iter).unwrap(); diff --git a/src/protocol/parser/pass_tokenizer.rs b/src/protocol/parser/pass_tokenizer.rs index d41b6f65939a7267f7a38fb635d78b51fb634e1d..6cb83d224693328d20a184c3c0bb26eb8c9b2482 100644 --- a/src/protocol/parser/pass_tokenizer.rs +++ b/src/protocol/parser/pass_tokenizer.rs @@ -83,12 +83,10 @@ impl PassTokenizer { } else if self.is_block_comment_start(c, source) { self.consume_block_comment(source, target)?; } else if is_whitespace(c) { - let contained_newline = self.consume_whitespace(source); - if contained_newline { - let range = &target.ranges[self.stack_idx]; - if range.range_kind == TokenRangeKind::Pragma { - self.pop_range(target, target.tokens.len() as u32); - } + self.consume_whitespace(source); + let range = &target.ranges[self.stack_idx]; + if range.range_kind == TokenRangeKind::Pragma { + self.pop_range(target, target.tokens.len() as u32); } } else { let was_punctuation = self.maybe_parse_punctuation(c, source, target)?; @@ -509,10 +507,9 @@ impl PassTokenizer { // Modify offset to not include the newline characters if cur_char == b'\n' { if prev_char == b'\r' { - end_pos.offset -= 2; - } else { end_pos.offset -= 1; } + // Consume final newline source.consume(); } else { diff --git a/src/protocol/parser/tokens.rs b/src/protocol/parser/tokens.rs index f3f61764315b9abefdef709ab89cea174e080af6..d8c6b7d25b01b89aea578664be187660c0a025bb 100644 --- a/src/protocol/parser/tokens.rs +++ b/src/protocol/parser/tokens.rs @@ -212,8 +212,9 @@ impl TokenBuffer { Self{ tokens: Vec::new(), ranges: Vec::new() } } - pub(crate) fn iter_range<'a>(&'a self, range: &TokenRange) -> TokenIter<'a> { - TokenIter::new(self, range.start as usize, range.end as usize) + pub(crate) fn iter_range<'a>(&'a self, inclusive_start: u32, exclusive_end: u32) -> TokenIter<'a> { + debug_assert!(exclusive_end as usize <= self.tokens.len()); + TokenIter::new(self, inclusive_start as usize, exclusive_end as usize) } pub(crate) fn start_pos(&self, range: &TokenRange) -> InputPosition { @@ -337,6 +338,10 @@ impl<'a> TokenIter<'a> { } } + pub(crate) fn token_index(&self) -> u32 { + return self.cur as u32; + } + /// Saves the current iteration position, may be passed to `load` to return /// the iterator to a previous position. pub(crate) fn save(&self) -> (usize, usize) { diff --git a/std/std.global.pdl b/std/std.global.pdl index 0c67ffe202972f05a301752a1659bdcf367ef0d4..49e3269e94b9c2443b72716c0e2981f16edd1ee0 100644 --- a/std/std.global.pdl +++ b/std/std.global.pdl @@ -1,9 +1,14 @@ #module std.global +// Note: parsing of token ranges and pragma needs to change. For now we insert +// spaces to work with the current system. Needs to be a system where the +// pragmas, "func" keywords (and similar keywords) indicate initial points to +// start parsing. + func get(in input) -> T { #builtin } func put(out output, T value) -> #type_void { #builtin } -func fires(#type_portlike) -> bool { #builtin } +func fires(#type_portlike ) -> bool { #builtin } func create(#type_integerlike length) -> T[] { #builtin } -func length(#type_arraylike array) -> u32 { #builtin } +func length(#type_arraylike array) -> u32 { #builtin } func assert(bool condition) -> #type_void { #builtin } func print(string message) -> #type_void { #builtin } \ No newline at end of file diff --git a/tokens.txt b/tokens.txt deleted file mode 100644 index 0fe2b215cf39f08d382804dffa53845566ea7455..0000000000000000000000000000000000000000 --- a/tokens.txt +++ /dev/null @@ -1,500 +0,0 @@ -- Unnamed module - - Range: Module - Source: token 0 to 167, file std.global.pdl:1:1 to 9:53 - Children: [ - - Range: Pragma - Source: token 0 to 7, file std.global.pdl:1:1 to 3:1 - Tokens: [ - Pragma (index 0) text: #module - Ident (index 2) text: std - Dot (index 4) - Ident (index 5) text: global - ] - - Range: Definition - Source: token 7 to 167, file std.global.pdl:3:1 to 9:53 - Children: [ - - Range: Code - Source: token 7 to 29, file std.global.pdl:3:1 to 3:33 - Tokens: [ - Ident (index 7) text: func - Ident (index 9) text: get - OpenAngle (index 11) - Ident (index 12) text: T - CloseAngle (index 14) - OpenParen (index 15) - Ident (index 16) text: in - OpenAngle (index 18) - Ident (index 19) text: T - CloseAngle (index 21) - Ident (index 22) text: input - CloseParen (index 24) - ArrowRight (index 25) - Ident (index 26) text: T - OpenCurly (index 28) - ] - - Range: Pragma - Source: token 29 to 32, file std.global.pdl:3:33 to 4:1 - Tokens: [ - Pragma (index 29) text: #builtin - CloseCurly (index 31) - ] - - Range: Definition - Source: token 32 to 167, file std.global.pdl:4:1 to 9:53 - Children: [ - - Range: Code - Source: token 32 to 56, file std.global.pdl:4:1 to 4:40 - Tokens: [ - Ident (index 32) text: func - Ident (index 34) text: put - OpenAngle (index 36) - Ident (index 37) text: T - CloseAngle (index 39) - OpenParen (index 40) - Ident (index 41) text: out - OpenAngle (index 43) - Ident (index 44) text: T - CloseAngle (index 46) - Ident (index 47) text: output - Comma (index 49) - Ident (index 50) text: T - Ident (index 52) text: value - CloseParen (index 54) - ArrowRight (index 55) - ] - - Range: Pragma - Source: token 56 to 167, file std.global.pdl:4:40 to 9:53 - Children: [ - - Range: Code - Source: token 56 to 59, file std.global.pdl:4:40 to 4:53 - Tokens: [ - Pragma (index 56) text: #type_void - OpenCurly (index 58) - ] - - Range: Pragma - Source: token 59 to 62, file std.global.pdl:4:53 to 5:1 - Tokens: [ - Pragma (index 59) text: #builtin - CloseCurly (index 61) - ] - - Range: Definition - Source: token 62 to 167, file std.global.pdl:5:1 to 9:53 - Children: [ - - Range: Code - Source: token 62 to 71, file std.global.pdl:5:1 to 5:15 - Tokens: [ - Ident (index 62) text: func - Ident (index 64) text: fires - OpenAngle (index 66) - Ident (index 67) text: T - CloseAngle (index 69) - OpenParen (index 70) - ] - - Range: Pragma - Source: token 71 to 167, file std.global.pdl:5:15 to 9:53 - Children: [ - - Range: Code - Source: token 71 to 82, file std.global.pdl:5:15 to 5:44 - Tokens: [ - Pragma (index 71) text: #type_portlike - OpenAngle (index 73) - Ident (index 74) text: T - CloseAngle (index 76) - CloseParen (index 77) - ArrowRight (index 78) - Ident (index 79) text: bool - OpenCurly (index 81) - ] - - Range: Pragma - Source: token 82 to 85, file std.global.pdl:5:44 to 6:1 - Tokens: [ - Pragma (index 82) text: #builtin - CloseCurly (index 84) - ] - - Range: Definition - Source: token 85 to 167, file std.global.pdl:6:1 to 9:53 - Children: [ - - Range: Code - Source: token 85 to 94, file std.global.pdl:6:1 to 6:16 - Tokens: [ - Ident (index 85) text: func - Ident (index 87) text: create - OpenAngle (index 89) - Ident (index 90) text: T - CloseAngle (index 92) - OpenParen (index 93) - ] - - Range: Pragma - Source: token 94 to 167, file std.global.pdl:6:16 to 9:53 - Children: [ - - Range: Code - Source: token 94 to 105, file std.global.pdl:6:16 to 6:51 - Tokens: [ - Pragma (index 94) text: #type_integerlike - Ident (index 96) text: length - CloseParen (index 98) - ArrowRight (index 99) - Ident (index 100) text: T - OpenSquare (index 102) - CloseSquare (index 103) - OpenCurly (index 104) - ] - - Range: Pragma - Source: token 105 to 108, file std.global.pdl:6:51 to 7:1 - Tokens: [ - Pragma (index 105) text: #builtin - CloseCurly (index 107) - ] - - Range: Definition - Source: token 108 to 167, file std.global.pdl:7:1 to 9:53 - Children: [ - - Range: Code - Source: token 108 to 117, file std.global.pdl:7:1 to 7:16 - Tokens: [ - Ident (index 108) text: func - Ident (index 110) text: length - OpenAngle (index 112) - Ident (index 113) text: T - CloseAngle (index 115) - OpenParen (index 116) - ] - - Range: Pragma - Source: token 117 to 167, file std.global.pdl:7:16 to 9:53 - Children: [ - - Range: Code - Source: token 117 to 130, file std.global.pdl:7:16 to 7:51 - Tokens: [ - Pragma (index 117) text: #type_arraylike - OpenAngle (index 119) - Ident (index 120) text: T - CloseAngle (index 122) - Ident (index 123) text: array - CloseParen (index 125) - ArrowRight (index 126) - Ident (index 127) text: u32 - OpenCurly (index 129) - ] - - Range: Pragma - Source: token 130 to 133, file std.global.pdl:7:51 to 8:1 - Tokens: [ - Pragma (index 130) text: #builtin - CloseCurly (index 132) - ] - - Range: Definition - Source: token 133 to 167, file std.global.pdl:8:1 to 9:53 - Children: [ - - Range: Code - Source: token 133 to 144, file std.global.pdl:8:1 to 8:32 - Tokens: [ - Ident (index 133) text: func - Ident (index 135) text: assert - OpenParen (index 137) - Ident (index 138) text: bool - Ident (index 140) text: condition - CloseParen (index 142) - ArrowRight (index 143) - ] - - Range: Pragma - Source: token 144 to 167, file std.global.pdl:8:32 to 9:53 - Children: [ - - Range: Code - Source: token 144 to 147, file std.global.pdl:8:32 to 8:45 - Tokens: [ - Pragma (index 144) text: #type_void - OpenCurly (index 146) - ] - - Range: Pragma - Source: token 147 to 150, file std.global.pdl:8:45 to 9:1 - Tokens: [ - Pragma (index 147) text: #builtin - CloseCurly (index 149) - ] - - Range: Definition - Source: token 150 to 167, file std.global.pdl:9:1 to 9:53 - Children: [ - - Range: Code - Source: token 150 to 161, file std.global.pdl:9:1 to 9:31 - Tokens: [ - Ident (index 150) text: func - Ident (index 152) text: print - OpenParen (index 154) - Ident (index 155) text: string - Ident (index 157) text: message - CloseParen (index 159) - ArrowRight (index 160) - ] - - Range: Pragma - Source: token 161 to 167, file std.global.pdl:9:31 to 9:53 - Children: [ - - Range: Code - Source: token 161 to 164, file std.global.pdl:9:31 to 9:44 - Tokens: [ - Pragma (index 161) text: #type_void - OpenCurly (index 163) - ] - - Range: Pragma - Source: token 164 to 167, file std.global.pdl:9:44 to 9:53 - Tokens: [ - Pragma (index 164) text: #builtin - CloseCurly (index 166) - ] - ] - ] - ] - ] - ] - ] - ] - ] - ] - ] - ] - ] - ] - ] -- Unnamed module - - Range: Module - Source: token 0 to 331, file :2:5 to 43:5 - Children: [ - - Range: Definition - Source: token 0 to 47, file :2:5 to 7:5 - Tokens: [ - Ident (index 0) text: func - Ident (index 2) text: infinite_assert - OpenAngle (index 4) - Ident (index 5) text: T - CloseAngle (index 7) - OpenParen (index 8) - Ident (index 9) text: T - Ident (index 11) text: val - Comma (index 13) - Ident (index 14) text: T - Ident (index 16) text: expected - CloseParen (index 18) - ArrowRight (index 19) - OpenParen (index 20) - CloseParen (index 21) - OpenCurly (index 22) - Ident (index 23) text: while - OpenParen (index 25) - Ident (index 26) text: val - NotEqual (index 28) - Ident (index 29) text: expected - CloseParen (index 31) - OpenCurly (index 32) - Ident (index 33) text: print - OpenParen (index 35) - String (index 36) text: "nope!" - CloseParen (index 38) - SemiColon (index 39) - CloseCurly (index 40) - Ident (index 41) text: return - OpenParen (index 43) - CloseParen (index 44) - SemiColon (index 45) - CloseCurly (index 46) - ] - - Range: Definition - Source: token 47 to 200, file :7:5 to 26:5 - Tokens: [ - Ident (index 47) text: primitive - Ident (index 49) text: receiver - OpenParen (index 51) - Ident (index 52) text: in - OpenAngle (index 54) - Ident (index 55) text: u32 - CloseAngle (index 57) - Ident (index 58) text: in_a - Comma (index 60) - Ident (index 61) text: in - OpenAngle (index 63) - Ident (index 64) text: u32 - CloseAngle (index 66) - Ident (index 67) text: in_b - Comma (index 69) - Ident (index 70) text: u32 - Ident (index 72) text: num_sends - CloseParen (index 74) - OpenCurly (index 75) - Ident (index 76) text: auto - Ident (index 78) text: num_from_a - Equal (index 80) - Integer (index 81) text: 0 - SemiColon (index 83) - Ident (index 84) text: auto - Ident (index 86) text: num_from_b - Equal (index 88) - Integer (index 89) text: 0 - SemiColon (index 91) - Ident (index 92) text: while - OpenParen (index 94) - Ident (index 95) text: num_from_a - Plus (index 97) - Ident (index 98) text: num_from_b - OpenAngle (index 100) - Integer (index 101) text: 2 - Star (index 103) - Ident (index 104) text: num_sends - CloseParen (index 106) - OpenCurly (index 107) - Ident (index 108) text: sync - Ident (index 110) text: select - OpenCurly (index 112) - Ident (index 113) text: auto - Ident (index 115) text: v - Equal (index 117) - Ident (index 118) text: get - OpenParen (index 120) - Ident (index 121) text: in_a - CloseParen (index 123) - ArrowRight (index 124) - OpenCurly (index 125) - Ident (index 126) text: print - OpenParen (index 128) - String (index 129) text: "got something from A" - CloseParen (index 131) - SemiColon (index 132) - Ident (index 133) text: auto - Ident (index 135) text: _ - Equal (index 137) - Ident (index 138) text: infinite_assert - OpenParen (index 140) - Ident (index 141) text: v - Comma (index 143) - Ident (index 144) text: num_from_a - CloseParen (index 146) - SemiColon (index 147) - Ident (index 148) text: num_from_a - PlusEquals (index 150) - Integer (index 151) text: 1 - SemiColon (index 153) - CloseCurly (index 154) - Ident (index 155) text: auto - Ident (index 157) text: v - Equal (index 159) - Ident (index 160) text: get - OpenParen (index 162) - Ident (index 163) text: in_b - CloseParen (index 165) - ArrowRight (index 166) - OpenCurly (index 167) - Ident (index 168) text: print - OpenParen (index 170) - String (index 171) text: "got something from B" - CloseParen (index 173) - SemiColon (index 174) - Ident (index 175) text: auto - Ident (index 177) text: _ - Equal (index 179) - Ident (index 180) text: infinite_assert - OpenParen (index 182) - Ident (index 183) text: v - Comma (index 185) - Ident (index 186) text: num_from_b - CloseParen (index 188) - SemiColon (index 189) - Ident (index 190) text: num_from_b - PlusEquals (index 192) - Integer (index 193) text: 1 - SemiColon (index 195) - CloseCurly (index 196) - CloseCurly (index 197) - CloseCurly (index 198) - CloseCurly (index 199) - ] - - Range: Definition - Source: token 200 to 260, file :26:5 to 36:5 - Tokens: [ - Ident (index 200) text: primitive - Ident (index 202) text: sender - OpenParen (index 204) - Ident (index 205) text: out - OpenAngle (index 207) - Ident (index 208) text: u32 - CloseAngle (index 210) - Ident (index 211) text: tx - Comma (index 213) - Ident (index 214) text: u32 - Ident (index 216) text: num_sends - CloseParen (index 218) - OpenCurly (index 219) - Ident (index 220) text: auto - Ident (index 222) text: index - Equal (index 224) - Integer (index 225) text: 0 - SemiColon (index 227) - Ident (index 228) text: while - OpenParen (index 230) - Ident (index 231) text: index - OpenAngle (index 233) - Ident (index 234) text: num_sends - CloseParen (index 236) - OpenCurly (index 237) - Ident (index 238) text: sync - OpenCurly (index 240) - Ident (index 241) text: put - OpenParen (index 243) - Ident (index 244) text: tx - Comma (index 246) - Ident (index 247) text: index - CloseParen (index 249) - SemiColon (index 250) - Ident (index 251) text: index - PlusEquals (index 253) - Integer (index 254) text: 1 - SemiColon (index 256) - CloseCurly (index 257) - CloseCurly (index 258) - CloseCurly (index 259) - ] - - Range: Definition - Source: token 260 to 331, file :36:5 to 43:5 - Tokens: [ - Ident (index 260) text: composite - Ident (index 262) text: constructor - OpenParen (index 264) - CloseParen (index 265) - OpenCurly (index 266) - Ident (index 267) text: auto - Ident (index 269) text: num_sends - Equal (index 271) - Integer (index 272) text: 15 - SemiColon (index 274) - Ident (index 275) text: channel - Ident (index 277) text: tx_a - ArrowRight (index 279) - Ident (index 280) text: rx_a - SemiColon (index 282) - Ident (index 283) text: channel - Ident (index 285) text: tx_b - ArrowRight (index 287) - Ident (index 288) text: rx_b - SemiColon (index 290) - Ident (index 291) text: new - Ident (index 293) text: sender - OpenParen (index 295) - Ident (index 296) text: tx_a - Comma (index 298) - Ident (index 299) text: num_sends - CloseParen (index 301) - SemiColon (index 302) - Ident (index 303) text: new - Ident (index 305) text: receiver - OpenParen (index 307) - Ident (index 308) text: rx_a - Comma (index 310) - Ident (index 311) text: rx_b - Comma (index 313) - Ident (index 314) text: num_sends - CloseParen (index 316) - SemiColon (index 317) - Ident (index 318) text: new - Ident (index 320) text: sender - OpenParen (index 322) - Ident (index 323) text: tx_b - Comma (index 325) - Ident (index 326) text: num_sends - CloseParen (index 328) - SemiColon (index 329) - CloseCurly (index 330) - ] - ]