diff --git a/src/protocol/input_source.rs b/src/protocol/input_source.rs index f2cf10693ae87bc134610829d91344bcc1f221f1..c2c7cd164eea9c9e433e2ecd569b4c65de2ae4ee 100644 --- a/src/protocol/input_source.rs +++ b/src/protocol/input_source.rs @@ -170,6 +170,13 @@ impl InputSource { return lookup; } + /// Retrieves the column associated with a line. Calling this incurs a read + /// lock, so don't spam it in happy-path compiler code. + pub(crate) fn get_column(&self, pos: InputPosition) -> u32 { + let line_start = self.lookup_line_start_offset(pos.line); + return pos.offset - line_start + 1; + } + /// Retrieves offset at which line starts (right after newline) fn lookup_line_start_offset(&self, line_number: u32) -> u32 { let lookup = self.get_lookup(); diff --git a/src/protocol/mod.rs b/src/protocol/mod.rs index f2b4fd7cdd1de22103ea9a251f737a130107d5ee..da5b60c9ceadb1d990a1c7bc6fa7a78e2b4ea2c9 100644 --- a/src/protocol/mod.rs +++ b/src/protocol/mod.rs @@ -6,6 +6,7 @@ mod parser; pub(crate) mod ast; pub(crate) mod ast_writer; +mod token_writer; use std::sync::Mutex; diff --git a/src/protocol/parser/mod.rs b/src/protocol/parser/mod.rs index 79f4c0fed6768f538799073c591e410813060cff..59235a8cd2397b44eea99458f897fb910aeb2959 100644 --- a/src/protocol/parser/mod.rs +++ b/src/protocol/parser/mod.rs @@ -32,6 +32,7 @@ use crate::protocol::input_source::*; use crate::protocol::ast_writer::ASTWriter; use crate::protocol::parser::type_table::PolymorphicVariable; +use crate::protocol::token_writer::TokenWriter; const REOWOLF_PATH_ENV: &'static str = "REOWOLF_ROOT"; // first lookup reowolf path const REOWOLF_PATH_DIR: &'static str = "std"; // then try folder in current working directory @@ -133,6 +134,7 @@ pub struct Parser { pass_rewriting: PassRewriting, pass_stack_size: PassStackSize, // Compiler options + pub write_tokens_to: Option, pub write_ast_to: Option, pub(crate) arch: TargetArch, } @@ -154,6 +156,7 @@ impl Parser { pass_typing: PassTyping::new(), pass_rewriting: PassRewriting::new(), pass_stack_size: PassStackSize::new(), + write_tokens_to: Some("tokens.txt".to_string()), write_ast_to: None, arch: TargetArch::new(), }; @@ -216,6 +219,12 @@ impl Parser { arch: &self.arch, }; + if let Some(filename) = &self.write_tokens_to { + let mut writer = TokenWriter::new(); + let mut file = std::fs::File::create(std::path::Path::new(filename)).unwrap(); + writer.write(&mut file, &self.modules); + } + // Advance all modules to the phase where all symbols are scanned for module_idx in 0..self.modules.len() { self.pass_symbols.parse(&mut self.modules, module_idx, &mut pass_ctx)?; diff --git a/src/protocol/parser/tokens.rs b/src/protocol/parser/tokens.rs index ccf6982e268efdeaf5a532c9644a6803fd9d148a..f3f61764315b9abefdef709ab89cea174e080af6 100644 --- a/src/protocol/parser/tokens.rs +++ b/src/protocol/parser/tokens.rs @@ -78,7 +78,7 @@ pub enum TokenKind { impl TokenKind { /// Returns true if the next expected token is the special `TokenKind::SpanEnd` token. This is /// the case for tokens of variable length (e.g. an identifier). - fn has_span_end(&self) -> bool { + pub(crate) fn has_span_end(&self) -> bool { return *self <= TokenKind::BlockComment } diff --git a/src/protocol/token_writer.rs b/src/protocol/token_writer.rs new file mode 100644 index 0000000000000000000000000000000000000000..37d52770c34a4efb22b06de483e15060d773c12b --- /dev/null +++ b/src/protocol/token_writer.rs @@ -0,0 +1,144 @@ +#![allow(dead_code)] + +use std::fmt::Write; +use std::io::Write as IOWrite; + +use crate::protocol::input_source::{InputSource, InputSpan}; +use crate::protocol::parser::Module; +use crate::protocol::tokens::{Token, TokenBuffer, TokenKind, TokenRange, TokenIter, TokenRangeKind}; + +pub(crate) struct TokenWriter { + buffer: String, +} + +impl TokenWriter { + pub(crate) fn new() -> Self { + return Self{ + buffer: String::with_capacity(4096), + } + } + + pub(crate) fn write(&mut self, w: &mut W, modules: &[Module]) { + self.buffer.clear(); + for module in modules { + self.write_module_tokens(module); + } + + w.write_all(self.buffer.as_bytes()).expect("write tokens"); + } + + fn write_module_tokens(&mut self, module: &Module) { + self.write_dashed_indent(0); + + match &module.name { + Some(name) => writeln!(self.buffer, "Module: {}", name.1.as_str()).unwrap(), + None => self.buffer.push_str("Unnamed module\n"), + } + + + let mut range_index = -1; + if !module.tokens.ranges.is_empty() { + range_index = 0; + } + + while range_index >= 0 { + range_index = self.write_token_range( + &module.source, &module.tokens.tokens, &module.tokens.ranges, range_index, 1 + ); + } + } + + /// Writes a single token range. Recurses if there are any child ranges. + /// Returns the next token range index to iterate over (or a negative + /// number, if there are no more sibling ranges). + fn write_token_range(&mut self, source: &InputSource, tokens: &[Token], ranges: &[TokenRange], range_index: i32, indent: u32) -> i32 { + // Write range kind + let range = &ranges[range_index as usize]; + self.write_dashed_indent(indent); + writeln!(self.buffer, "Range: {:?}", range.range_kind); + + // Write tokens/lines it spans + let first_token_pos = tokens[range.start as usize].pos; + + let last_token_pos = if (range.end as usize) < tokens.len() { + tokens[range.end as usize].pos + } else { + tokens.last().unwrap().pos + }; + let first_source_col = source.get_column(first_token_pos); + let last_source_col = source.get_column(last_token_pos); + + self.write_indent(indent); + writeln!( + self.buffer, "Source: token {} to {}, file {}:{}:{} to {}:{}", + range.start, range.end, source.filename, + first_token_pos.line, first_source_col, + last_token_pos.line, last_source_col + ); + + let next_sibling_index = range.next_sibling_idx; + if range.num_child_ranges == 0 { + // No child ranges, so dump the tokens here + debug_assert!(range.first_child_idx < 0); + self.write_token_array(source, tokens, range, indent); + } else { + // Child ranges + debug_assert!(range.first_child_idx >= 0); + self.write_indent(indent); + writeln!(self.buffer, "Children: ["); + + let mut range_index = range.first_child_idx; + while range_index >= 0 { + range_index = self.write_token_range(source, tokens, ranges, range_index, indent + 1); + } + + self.write_indent(indent); + writeln!(self.buffer, "]"); + } + + // Wrote everything, return the next sibling token range + return next_sibling_index; + } + + fn write_token_array(&mut self, source: &InputSource, tokens: &[Token], range: &TokenRange, indent: u32) { + self.write_indent(indent); + writeln!(self.buffer, "Tokens: ["); + + let token_indent = indent + 1; + for token_index in range.start as usize..range.end as usize { + // Skip uninteresting tokens + let token = &tokens[token_index]; + if token.kind == TokenKind::SpanEnd { + continue; + } + + self.write_indent(token_indent); + write!(self.buffer, "{:?} (index {})", token.kind, token_index); + if token.kind.has_span_end() { + let token_start = token.pos; + let token_end = tokens[token_index + 1].pos; + let section = source.section_at_span(InputSpan::from_positions(token_start, token_end)); + writeln!(self.buffer, " text: {}", String::from_utf8_lossy(section)); + } else { + self.buffer.push('\n'); + } + } + + self.write_indent(indent); + writeln!(self.buffer, "]"); + } + + fn write_dashed_indent(&mut self, indent: u32) { + for _ in 0..indent * 2 { + self.buffer.push(' '); + } + self.buffer.push('-'); + self.buffer.push(' '); + } + + fn write_indent(&mut self, indent: u32) { + for _ in 0..(indent + 1)*2 { + self.buffer.push(' '); + } + } +} \ No newline at end of file diff --git a/tokens.txt b/tokens.txt new file mode 100644 index 0000000000000000000000000000000000000000..0fe2b215cf39f08d382804dffa53845566ea7455 --- /dev/null +++ b/tokens.txt @@ -0,0 +1,500 @@ +- Unnamed module + - Range: Module + Source: token 0 to 167, file std.global.pdl:1:1 to 9:53 + Children: [ + - Range: Pragma + Source: token 0 to 7, file std.global.pdl:1:1 to 3:1 + Tokens: [ + Pragma (index 0) text: #module + Ident (index 2) text: std + Dot (index 4) + Ident (index 5) text: global + ] + - Range: Definition + Source: token 7 to 167, file std.global.pdl:3:1 to 9:53 + Children: [ + - Range: Code + Source: token 7 to 29, file std.global.pdl:3:1 to 3:33 + Tokens: [ + Ident (index 7) text: func + Ident (index 9) text: get + OpenAngle (index 11) + Ident (index 12) text: T + CloseAngle (index 14) + OpenParen (index 15) + Ident (index 16) text: in + OpenAngle (index 18) + Ident (index 19) text: T + CloseAngle (index 21) + Ident (index 22) text: input + CloseParen (index 24) + ArrowRight (index 25) + Ident (index 26) text: T + OpenCurly (index 28) + ] + - Range: Pragma + Source: token 29 to 32, file std.global.pdl:3:33 to 4:1 + Tokens: [ + Pragma (index 29) text: #builtin + CloseCurly (index 31) + ] + - Range: Definition + Source: token 32 to 167, file std.global.pdl:4:1 to 9:53 + Children: [ + - Range: Code + Source: token 32 to 56, file std.global.pdl:4:1 to 4:40 + Tokens: [ + Ident (index 32) text: func + Ident (index 34) text: put + OpenAngle (index 36) + Ident (index 37) text: T + CloseAngle (index 39) + OpenParen (index 40) + Ident (index 41) text: out + OpenAngle (index 43) + Ident (index 44) text: T + CloseAngle (index 46) + Ident (index 47) text: output + Comma (index 49) + Ident (index 50) text: T + Ident (index 52) text: value + CloseParen (index 54) + ArrowRight (index 55) + ] + - Range: Pragma + Source: token 56 to 167, file std.global.pdl:4:40 to 9:53 + Children: [ + - Range: Code + Source: token 56 to 59, file std.global.pdl:4:40 to 4:53 + Tokens: [ + Pragma (index 56) text: #type_void + OpenCurly (index 58) + ] + - Range: Pragma + Source: token 59 to 62, file std.global.pdl:4:53 to 5:1 + Tokens: [ + Pragma (index 59) text: #builtin + CloseCurly (index 61) + ] + - Range: Definition + Source: token 62 to 167, file std.global.pdl:5:1 to 9:53 + Children: [ + - Range: Code + Source: token 62 to 71, file std.global.pdl:5:1 to 5:15 + Tokens: [ + Ident (index 62) text: func + Ident (index 64) text: fires + OpenAngle (index 66) + Ident (index 67) text: T + CloseAngle (index 69) + OpenParen (index 70) + ] + - Range: Pragma + Source: token 71 to 167, file std.global.pdl:5:15 to 9:53 + Children: [ + - Range: Code + Source: token 71 to 82, file std.global.pdl:5:15 to 5:44 + Tokens: [ + Pragma (index 71) text: #type_portlike + OpenAngle (index 73) + Ident (index 74) text: T + CloseAngle (index 76) + CloseParen (index 77) + ArrowRight (index 78) + Ident (index 79) text: bool + OpenCurly (index 81) + ] + - Range: Pragma + Source: token 82 to 85, file std.global.pdl:5:44 to 6:1 + Tokens: [ + Pragma (index 82) text: #builtin + CloseCurly (index 84) + ] + - Range: Definition + Source: token 85 to 167, file std.global.pdl:6:1 to 9:53 + Children: [ + - Range: Code + Source: token 85 to 94, file std.global.pdl:6:1 to 6:16 + Tokens: [ + Ident (index 85) text: func + Ident (index 87) text: create + OpenAngle (index 89) + Ident (index 90) text: T + CloseAngle (index 92) + OpenParen (index 93) + ] + - Range: Pragma + Source: token 94 to 167, file std.global.pdl:6:16 to 9:53 + Children: [ + - Range: Code + Source: token 94 to 105, file std.global.pdl:6:16 to 6:51 + Tokens: [ + Pragma (index 94) text: #type_integerlike + Ident (index 96) text: length + CloseParen (index 98) + ArrowRight (index 99) + Ident (index 100) text: T + OpenSquare (index 102) + CloseSquare (index 103) + OpenCurly (index 104) + ] + - Range: Pragma + Source: token 105 to 108, file std.global.pdl:6:51 to 7:1 + Tokens: [ + Pragma (index 105) text: #builtin + CloseCurly (index 107) + ] + - Range: Definition + Source: token 108 to 167, file std.global.pdl:7:1 to 9:53 + Children: [ + - Range: Code + Source: token 108 to 117, file std.global.pdl:7:1 to 7:16 + Tokens: [ + Ident (index 108) text: func + Ident (index 110) text: length + OpenAngle (index 112) + Ident (index 113) text: T + CloseAngle (index 115) + OpenParen (index 116) + ] + - Range: Pragma + Source: token 117 to 167, file std.global.pdl:7:16 to 9:53 + Children: [ + - Range: Code + Source: token 117 to 130, file std.global.pdl:7:16 to 7:51 + Tokens: [ + Pragma (index 117) text: #type_arraylike + OpenAngle (index 119) + Ident (index 120) text: T + CloseAngle (index 122) + Ident (index 123) text: array + CloseParen (index 125) + ArrowRight (index 126) + Ident (index 127) text: u32 + OpenCurly (index 129) + ] + - Range: Pragma + Source: token 130 to 133, file std.global.pdl:7:51 to 8:1 + Tokens: [ + Pragma (index 130) text: #builtin + CloseCurly (index 132) + ] + - Range: Definition + Source: token 133 to 167, file std.global.pdl:8:1 to 9:53 + Children: [ + - Range: Code + Source: token 133 to 144, file std.global.pdl:8:1 to 8:32 + Tokens: [ + Ident (index 133) text: func + Ident (index 135) text: assert + OpenParen (index 137) + Ident (index 138) text: bool + Ident (index 140) text: condition + CloseParen (index 142) + ArrowRight (index 143) + ] + - Range: Pragma + Source: token 144 to 167, file std.global.pdl:8:32 to 9:53 + Children: [ + - Range: Code + Source: token 144 to 147, file std.global.pdl:8:32 to 8:45 + Tokens: [ + Pragma (index 144) text: #type_void + OpenCurly (index 146) + ] + - Range: Pragma + Source: token 147 to 150, file std.global.pdl:8:45 to 9:1 + Tokens: [ + Pragma (index 147) text: #builtin + CloseCurly (index 149) + ] + - Range: Definition + Source: token 150 to 167, file std.global.pdl:9:1 to 9:53 + Children: [ + - Range: Code + Source: token 150 to 161, file std.global.pdl:9:1 to 9:31 + Tokens: [ + Ident (index 150) text: func + Ident (index 152) text: print + OpenParen (index 154) + Ident (index 155) text: string + Ident (index 157) text: message + CloseParen (index 159) + ArrowRight (index 160) + ] + - Range: Pragma + Source: token 161 to 167, file std.global.pdl:9:31 to 9:53 + Children: [ + - Range: Code + Source: token 161 to 164, file std.global.pdl:9:31 to 9:44 + Tokens: [ + Pragma (index 161) text: #type_void + OpenCurly (index 163) + ] + - Range: Pragma + Source: token 164 to 167, file std.global.pdl:9:44 to 9:53 + Tokens: [ + Pragma (index 164) text: #builtin + CloseCurly (index 166) + ] + ] + ] + ] + ] + ] + ] + ] + ] + ] + ] + ] + ] + ] + ] +- Unnamed module + - Range: Module + Source: token 0 to 331, file :2:5 to 43:5 + Children: [ + - Range: Definition + Source: token 0 to 47, file :2:5 to 7:5 + Tokens: [ + Ident (index 0) text: func + Ident (index 2) text: infinite_assert + OpenAngle (index 4) + Ident (index 5) text: T + CloseAngle (index 7) + OpenParen (index 8) + Ident (index 9) text: T + Ident (index 11) text: val + Comma (index 13) + Ident (index 14) text: T + Ident (index 16) text: expected + CloseParen (index 18) + ArrowRight (index 19) + OpenParen (index 20) + CloseParen (index 21) + OpenCurly (index 22) + Ident (index 23) text: while + OpenParen (index 25) + Ident (index 26) text: val + NotEqual (index 28) + Ident (index 29) text: expected + CloseParen (index 31) + OpenCurly (index 32) + Ident (index 33) text: print + OpenParen (index 35) + String (index 36) text: "nope!" + CloseParen (index 38) + SemiColon (index 39) + CloseCurly (index 40) + Ident (index 41) text: return + OpenParen (index 43) + CloseParen (index 44) + SemiColon (index 45) + CloseCurly (index 46) + ] + - Range: Definition + Source: token 47 to 200, file :7:5 to 26:5 + Tokens: [ + Ident (index 47) text: primitive + Ident (index 49) text: receiver + OpenParen (index 51) + Ident (index 52) text: in + OpenAngle (index 54) + Ident (index 55) text: u32 + CloseAngle (index 57) + Ident (index 58) text: in_a + Comma (index 60) + Ident (index 61) text: in + OpenAngle (index 63) + Ident (index 64) text: u32 + CloseAngle (index 66) + Ident (index 67) text: in_b + Comma (index 69) + Ident (index 70) text: u32 + Ident (index 72) text: num_sends + CloseParen (index 74) + OpenCurly (index 75) + Ident (index 76) text: auto + Ident (index 78) text: num_from_a + Equal (index 80) + Integer (index 81) text: 0 + SemiColon (index 83) + Ident (index 84) text: auto + Ident (index 86) text: num_from_b + Equal (index 88) + Integer (index 89) text: 0 + SemiColon (index 91) + Ident (index 92) text: while + OpenParen (index 94) + Ident (index 95) text: num_from_a + Plus (index 97) + Ident (index 98) text: num_from_b + OpenAngle (index 100) + Integer (index 101) text: 2 + Star (index 103) + Ident (index 104) text: num_sends + CloseParen (index 106) + OpenCurly (index 107) + Ident (index 108) text: sync + Ident (index 110) text: select + OpenCurly (index 112) + Ident (index 113) text: auto + Ident (index 115) text: v + Equal (index 117) + Ident (index 118) text: get + OpenParen (index 120) + Ident (index 121) text: in_a + CloseParen (index 123) + ArrowRight (index 124) + OpenCurly (index 125) + Ident (index 126) text: print + OpenParen (index 128) + String (index 129) text: "got something from A" + CloseParen (index 131) + SemiColon (index 132) + Ident (index 133) text: auto + Ident (index 135) text: _ + Equal (index 137) + Ident (index 138) text: infinite_assert + OpenParen (index 140) + Ident (index 141) text: v + Comma (index 143) + Ident (index 144) text: num_from_a + CloseParen (index 146) + SemiColon (index 147) + Ident (index 148) text: num_from_a + PlusEquals (index 150) + Integer (index 151) text: 1 + SemiColon (index 153) + CloseCurly (index 154) + Ident (index 155) text: auto + Ident (index 157) text: v + Equal (index 159) + Ident (index 160) text: get + OpenParen (index 162) + Ident (index 163) text: in_b + CloseParen (index 165) + ArrowRight (index 166) + OpenCurly (index 167) + Ident (index 168) text: print + OpenParen (index 170) + String (index 171) text: "got something from B" + CloseParen (index 173) + SemiColon (index 174) + Ident (index 175) text: auto + Ident (index 177) text: _ + Equal (index 179) + Ident (index 180) text: infinite_assert + OpenParen (index 182) + Ident (index 183) text: v + Comma (index 185) + Ident (index 186) text: num_from_b + CloseParen (index 188) + SemiColon (index 189) + Ident (index 190) text: num_from_b + PlusEquals (index 192) + Integer (index 193) text: 1 + SemiColon (index 195) + CloseCurly (index 196) + CloseCurly (index 197) + CloseCurly (index 198) + CloseCurly (index 199) + ] + - Range: Definition + Source: token 200 to 260, file :26:5 to 36:5 + Tokens: [ + Ident (index 200) text: primitive + Ident (index 202) text: sender + OpenParen (index 204) + Ident (index 205) text: out + OpenAngle (index 207) + Ident (index 208) text: u32 + CloseAngle (index 210) + Ident (index 211) text: tx + Comma (index 213) + Ident (index 214) text: u32 + Ident (index 216) text: num_sends + CloseParen (index 218) + OpenCurly (index 219) + Ident (index 220) text: auto + Ident (index 222) text: index + Equal (index 224) + Integer (index 225) text: 0 + SemiColon (index 227) + Ident (index 228) text: while + OpenParen (index 230) + Ident (index 231) text: index + OpenAngle (index 233) + Ident (index 234) text: num_sends + CloseParen (index 236) + OpenCurly (index 237) + Ident (index 238) text: sync + OpenCurly (index 240) + Ident (index 241) text: put + OpenParen (index 243) + Ident (index 244) text: tx + Comma (index 246) + Ident (index 247) text: index + CloseParen (index 249) + SemiColon (index 250) + Ident (index 251) text: index + PlusEquals (index 253) + Integer (index 254) text: 1 + SemiColon (index 256) + CloseCurly (index 257) + CloseCurly (index 258) + CloseCurly (index 259) + ] + - Range: Definition + Source: token 260 to 331, file :36:5 to 43:5 + Tokens: [ + Ident (index 260) text: composite + Ident (index 262) text: constructor + OpenParen (index 264) + CloseParen (index 265) + OpenCurly (index 266) + Ident (index 267) text: auto + Ident (index 269) text: num_sends + Equal (index 271) + Integer (index 272) text: 15 + SemiColon (index 274) + Ident (index 275) text: channel + Ident (index 277) text: tx_a + ArrowRight (index 279) + Ident (index 280) text: rx_a + SemiColon (index 282) + Ident (index 283) text: channel + Ident (index 285) text: tx_b + ArrowRight (index 287) + Ident (index 288) text: rx_b + SemiColon (index 290) + Ident (index 291) text: new + Ident (index 293) text: sender + OpenParen (index 295) + Ident (index 296) text: tx_a + Comma (index 298) + Ident (index 299) text: num_sends + CloseParen (index 301) + SemiColon (index 302) + Ident (index 303) text: new + Ident (index 305) text: receiver + OpenParen (index 307) + Ident (index 308) text: rx_a + Comma (index 310) + Ident (index 311) text: rx_b + Comma (index 313) + Ident (index 314) text: num_sends + CloseParen (index 316) + SemiColon (index 317) + Ident (index 318) text: new + Ident (index 320) text: sender + OpenParen (index 322) + Ident (index 323) text: tx_b + Comma (index 325) + Ident (index 326) text: num_sends + CloseParen (index 328) + SemiColon (index 329) + CloseCurly (index 330) + ] + ]