Files @ c9800c8f19d7
Branch filter:

Location: CSY/reowolf/src/protocol/token_writer.rs

c9800c8f19d7 4.8 KiB application/rls-services+xml Show Annotation Show as Raw Download as Raw
mh
Rewrite tokenizer to emit markers iso ranges
#![allow(dead_code)]

use std::fmt::Write;
use std::io::Write as IOWrite;

use crate::protocol::input_source::{InputSource, InputSpan};
use crate::protocol::parser::Module;
use crate::protocol::tokens::{Token, TokenKind, TokenRange};

pub(crate) struct TokenWriter {
    buffer: String,
}

impl TokenWriter {
    pub(crate) fn new() -> Self {
        return Self{
            buffer: String::with_capacity(4096),
        }
    }

    pub(crate) fn write<W: IOWrite>(&mut self, w: &mut W, modules: &[Module]) {
        self.buffer.clear();
        for module in modules {
            self.write_module_tokens(module);
        }

        w.write_all(self.buffer.as_bytes()).expect("write tokens");
    }

    fn write_module_tokens(&mut self, module: &Module) {
        self.write_dashed_indent(0);

        match &module.name {
            Some(name) => writeln!(self.buffer, "Module: {}", name.1.as_str()).unwrap(),
            None => self.buffer.push_str("Unnamed module\n"),
        }


        let mut range_index = -1;
        if !module.tokens.ranges.is_empty() {
            range_index = 0;
        }

        while range_index >= 0 {
            range_index = self.write_token_range(
                &module.source, &module.tokens.tokens, &module.tokens.ranges, range_index, 1
            );
        }
    }

    /// Writes a single token range. Recurses if there are any child ranges.
    /// Returns the next token range index to iterate over (or a negative
    /// number, if there are no more sibling ranges).
    fn write_token_range(&mut self, source: &InputSource, tokens: &[Token], ranges: &[TokenRange], range_index: i32, indent: u32) -> i32 {
        // Write range kind
        let range = &ranges[range_index as usize];
        self.write_dashed_indent(indent);
        writeln!(self.buffer, "Range: {:?}", range.range_kind);

        // Write tokens/lines it spans
        let first_token_pos = tokens[range.start as usize].pos;

        let last_token_pos = if (range.end as usize) < tokens.len() {
            tokens[range.end as usize].pos
        } else {
            tokens.last().unwrap().pos
        };
        let first_source_col = source.get_column(first_token_pos);
        let last_source_col = source.get_column(last_token_pos);

        self.write_indent(indent);
        writeln!(
            self.buffer, "Source: token {} to {}, file {}:{}:{} to {}:{}",
            range.start, range.end, source.filename,
            first_token_pos.line, first_source_col,
            last_token_pos.line, last_source_col
        );

        let next_sibling_index = range.next_sibling_idx;
        if range.num_child_ranges == 0 {
            // No child ranges, so dump the tokens here
            debug_assert!(range.first_child_idx < 0);
            self.write_token_array(source, tokens, range, indent);
        } else {
            // Child ranges
            debug_assert!(range.first_child_idx >= 0);
            self.write_indent(indent);
            writeln!(self.buffer, "Children: [");

            let mut range_index = range.first_child_idx;
            while range_index >= 0 {
                range_index = self.write_token_range(source, tokens, ranges, range_index, indent + 1);
            }

            self.write_indent(indent);
            writeln!(self.buffer, "]");
        }

        // Wrote everything, return the next sibling token range
        return next_sibling_index;
    }

    fn write_token_array(&mut self, source: &InputSource, tokens: &[Token], range: &TokenRange, indent: u32) {
        self.write_indent(indent);
        writeln!(self.buffer, "Tokens: [");

        let token_indent = indent + 1;
        for token_index in range.start as usize..range.end as usize {
            // Skip uninteresting tokens
            let token = &tokens[token_index];
            if token.kind == TokenKind::SpanEnd {
                continue;
            }

            self.write_indent(token_indent);
            write!(self.buffer, "{:?} (index {})", token.kind, token_index);
            if token.kind.has_span_end() {
                let token_start = token.pos;
                let token_end = tokens[token_index + 1].pos;
                let section = source.section_at_span(InputSpan::from_positions(token_start, token_end));
                writeln!(self.buffer, " text: {}", String::from_utf8_lossy(section));
            } else {
                self.buffer.push('\n');
            }
        }

        self.write_indent(indent);
        writeln!(self.buffer, "]");
    }

    fn write_dashed_indent(&mut self, indent: u32) {
        for _ in 0..indent * 2 {
            self.buffer.push(' ');
        }
        self.buffer.push('-');
        self.buffer.push(' ');
    }

    fn write_indent(&mut self, indent: u32) {
        for _ in 0..(indent + 1)*2 {
            self.buffer.push(' ');
        }
    }
}