Files @ 42c130e76c4b
Branch filter:

Location: CSY/reowolf/src/protocol/parser/mod.rs

42c130e76c4b 15.6 KiB application/rls-services+xml Show Annotation Show as Raw Download as Raw
mh
Add number of rounds to random test component
#[macro_use] mod visitor;
pub(crate) mod symbol_table;
pub(crate) mod type_table;
pub(crate) mod tokens;
pub(crate) mod token_parsing;
pub(crate) mod pass_tokenizer;
pub(crate) mod pass_symbols;
pub(crate) mod pass_imports;
pub(crate) mod pass_definitions;
pub(crate) mod pass_definitions_types;
pub(crate) mod pass_validation_linking;
pub(crate) mod pass_rewriting;
pub(crate) mod pass_typing;
pub(crate) mod pass_stack_size;

use tokens::*;
use crate::collections::*;
use visitor::Visitor;
use pass_tokenizer::PassTokenizer;
use pass_symbols::PassSymbols;
use pass_imports::PassImport;
use pass_definitions::PassDefinitions;
use pass_validation_linking::PassValidationLinking;
use pass_typing::{PassTyping, ResolveQueue};
use pass_rewriting::PassRewriting;
use pass_stack_size::PassStackSize;
use symbol_table::*;
use type_table::*;

use crate::protocol::ast::*;
use crate::protocol::input_source::*;

use crate::protocol::ast_writer::ASTWriter;
use crate::protocol::parser::type_table::PolymorphicVariable;
use crate::protocol::token_writer::TokenWriter;

const REOWOLF_PATH_ENV: &'static str = "REOWOLF_ROOT"; // first lookup reowolf path
const REOWOLF_PATH_DIR: &'static str = "std"; // then try folder in current working directory

#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)]
pub enum ModuleCompilationPhase {
    Tokenized,              // source is tokenized
    SymbolsScanned,         // all definitions are linked to their type class
    ImportsResolved,        // all imports are added to the symbol table
    DefinitionsParsed,      // produced the AST for the entire module
    TypesAddedToTable,      // added all definitions to the type table
    ValidatedAndLinked,     // AST is traversed and has linked the required AST nodes
    Typed,                  // Type inference and checking has been performed
    Rewritten,              // Special AST nodes are rewritten into regular AST nodes
    // When we continue with the compiler:
    // StackSize
}

pub struct Module {
    pub source: InputSource,
    pub tokens: TokenBuffer,
    pub is_compiler_file: bool, // TODO: @Hack
    pub root_id: RootId,
    pub name: Option<(PragmaId, StringRef<'static>)>,
    pub version: Option<(PragmaId, i64)>,
    pub phase: ModuleCompilationPhase,
}

pub struct TargetArch {
    pub void_type_id: TypeId,
    pub message_type_id: TypeId,
    pub bool_type_id: TypeId,
    pub uint8_type_id: TypeId,
    pub uint16_type_id: TypeId,
    pub uint32_type_id: TypeId,
    pub uint64_type_id: TypeId,
    pub sint8_type_id: TypeId,
    pub sint16_type_id: TypeId,
    pub sint32_type_id: TypeId,
    pub sint64_type_id: TypeId,
    pub char_type_id: TypeId,
    pub string_type_id: TypeId,
    pub array_type_id: TypeId,
    pub slice_type_id: TypeId,
    pub input_type_id: TypeId,
    pub output_type_id: TypeId,
    pub pointer_type_id: TypeId,
}

impl TargetArch {
    fn new() -> Self {
        return Self{
            void_type_id: TypeId::new_invalid(),
            bool_type_id: TypeId::new_invalid(),
            message_type_id: TypeId::new_invalid(),
            uint8_type_id: TypeId::new_invalid(),
            uint16_type_id: TypeId::new_invalid(),
            uint32_type_id: TypeId::new_invalid(),
            uint64_type_id: TypeId::new_invalid(),
            sint8_type_id: TypeId::new_invalid(),
            sint16_type_id: TypeId::new_invalid(),
            sint32_type_id: TypeId::new_invalid(),
            sint64_type_id: TypeId::new_invalid(),
            char_type_id: TypeId::new_invalid(),
            string_type_id: TypeId::new_invalid(),
            array_type_id: TypeId::new_invalid(),
            slice_type_id: TypeId::new_invalid(),
            input_type_id: TypeId::new_invalid(),
            output_type_id: TypeId::new_invalid(),
            pointer_type_id: TypeId::new_invalid(),
        }
    }
}

pub struct PassCtx<'a> {
    heap: &'a mut Heap,
    symbols: &'a mut SymbolTable,
    pool: &'a mut StringPool,
    arch: &'a TargetArch,
}

pub struct Parser {
    // Storage of all information created/gathered during compilation.
    pub(crate) heap: Heap,
    pub(crate) string_pool: StringPool, // Do not deallocate, holds all strings
    pub(crate) modules: Vec<Module>,
    pub(crate) symbol_table: SymbolTable,
    pub(crate) type_table: TypeTable,
    pub(crate) global_module_index: usize, // contains globals, implicitly imported everywhere
    // Compiler passes, used as little state machine that keep their memory
    // around.
    pass_tokenizer: PassTokenizer,
    pass_symbols: PassSymbols,
    pass_import: PassImport,
    pass_definitions: PassDefinitions,
    pass_validation: PassValidationLinking,
    pass_typing: PassTyping,
    pass_rewriting: PassRewriting,
    pass_stack_size: PassStackSize,
    // Compiler options
    pub write_tokens_to: Option<String>,
    pub write_ast_to: Option<String>,
    pub(crate) arch: TargetArch,
}

impl Parser {
    pub fn new() -> Result<Self, String> {
        let mut parser = Parser{
            heap: Heap::new(),
            string_pool: StringPool::new(),
            modules: Vec::new(),
            symbol_table: SymbolTable::new(),
            type_table: TypeTable::new(),
            global_module_index: 0,
            pass_tokenizer: PassTokenizer::new(),
            pass_symbols: PassSymbols::new(),
            pass_import: PassImport::new(),
            pass_definitions: PassDefinitions::new(),
            pass_validation: PassValidationLinking::new(),
            pass_typing: PassTyping::new(),
            pass_rewriting: PassRewriting::new(),
            pass_stack_size: PassStackSize::new(),
            write_tokens_to: None,
            write_ast_to: None,
            arch: TargetArch::new(),
        };

        parser.symbol_table.insert_scope(None, SymbolScope::Global);

        // Insert builtin types
        // TODO: At some point use correct values for size/alignment
        parser.arch.void_type_id    = insert_builtin_type(&mut parser.type_table, vec![ConcreteTypePart::Void], false, 0, 1);
        parser.arch.message_type_id = insert_builtin_type(&mut parser.type_table, vec![ConcreteTypePart::Message], false, 24, 8);
        parser.arch.bool_type_id    = insert_builtin_type(&mut parser.type_table, vec![ConcreteTypePart::Bool], false, 1, 1);
        parser.arch.uint8_type_id   = insert_builtin_type(&mut parser.type_table, vec![ConcreteTypePart::UInt8], false, 1, 1);
        parser.arch.uint16_type_id  = insert_builtin_type(&mut parser.type_table, vec![ConcreteTypePart::UInt16], false, 2, 2);
        parser.arch.uint32_type_id  = insert_builtin_type(&mut parser.type_table, vec![ConcreteTypePart::UInt32], false, 4, 4);
        parser.arch.uint64_type_id  = insert_builtin_type(&mut parser.type_table, vec![ConcreteTypePart::UInt64], false, 8, 8);
        parser.arch.sint8_type_id   = insert_builtin_type(&mut parser.type_table, vec![ConcreteTypePart::SInt8], false, 1, 1);
        parser.arch.sint16_type_id  = insert_builtin_type(&mut parser.type_table, vec![ConcreteTypePart::SInt16], false, 2, 2);
        parser.arch.sint32_type_id  = insert_builtin_type(&mut parser.type_table, vec![ConcreteTypePart::SInt32], false, 4, 4);
        parser.arch.sint64_type_id  = insert_builtin_type(&mut parser.type_table, vec![ConcreteTypePart::SInt64], false, 8, 8);
        parser.arch.char_type_id    = insert_builtin_type(&mut parser.type_table, vec![ConcreteTypePart::Character], false, 4, 4);
        parser.arch.string_type_id  = insert_builtin_type(&mut parser.type_table, vec![ConcreteTypePart::String], false, 24, 8);
        parser.arch.array_type_id   = insert_builtin_type(&mut parser.type_table, vec![ConcreteTypePart::Array, ConcreteTypePart::Void], true, 24, 8);
        parser.arch.slice_type_id   = insert_builtin_type(&mut parser.type_table, vec![ConcreteTypePart::Slice, ConcreteTypePart::Void], true, 16, 4);
        parser.arch.input_type_id   = insert_builtin_type(&mut parser.type_table, vec![ConcreteTypePart::Input, ConcreteTypePart::Void], true, 8, 8);
        parser.arch.output_type_id  = insert_builtin_type(&mut parser.type_table, vec![ConcreteTypePart::Output, ConcreteTypePart::Void], true, 8, 8);
        parser.arch.pointer_type_id = insert_builtin_type(&mut parser.type_table, vec![ConcreteTypePart::Pointer, ConcreteTypePart::Void], true, 8, 8);

        // Parse standard library
        parser.feed_standard_library()?;

        return Ok(parser)
    }

    /// Feeds a new InputSource to the parser, which will tokenize it and store
    /// it internally for later parsing (when all modules are present). Returns
    /// the index of the new module.
    pub fn feed(&mut self, mut source: InputSource) -> Result<usize, ParseError> {
        return self.feed_internal(source, false);
    }

    pub fn parse(&mut self) -> Result<(), ParseError> {
        let mut pass_ctx = PassCtx{
            heap: &mut self.heap,
            symbols: &mut self.symbol_table,
            pool: &mut self.string_pool,
            arch: &self.arch,
        };

        // Advance all modules to the phase where all symbols are scanned
        for module_idx in 0..self.modules.len() {
            self.pass_symbols.parse(&mut self.modules, module_idx, &mut pass_ctx)?;
        }

        // With all symbols scanned, perform further compilation until we can
        // add all base types to the type table.
        for module_idx in 0..self.modules.len() {
            self.pass_import.parse(&mut self.modules, module_idx, &mut pass_ctx)?;
            self.pass_definitions.parse(&mut self.modules, module_idx, &mut pass_ctx)?;
        }

        if let Some(filename) = &self.write_tokens_to {
            let mut writer = TokenWriter::new();
            let mut file = std::fs::File::create(std::path::Path::new(filename)).unwrap();
            writer.write(&mut file, &self.modules);
        }

        // Add every known type to the type table
        self.type_table.build_base_types(&mut self.modules, &mut pass_ctx)?;

        // Continue compilation with the remaining phases now that the types
        // are all in the type table
        for module_idx in 0..self.modules.len() {
            let mut ctx = visitor::Ctx{
                heap: &mut self.heap,
                modules: &mut self.modules,
                module_idx,
                symbols: &mut self.symbol_table,
                types: &mut self.type_table,
                arch: &self.arch,
            };
            self.pass_validation.visit_module(&mut ctx)?;
        }

        // Perform typechecking on all modules
        let mut queue = ResolveQueue::new();
        for module_idx in 0..self.modules.len() {
            let mut ctx = visitor::Ctx{
                heap: &mut self.heap,
                modules: &mut self.modules,
                module_idx,
                symbols: &mut self.symbol_table,
                types: &mut self.type_table,
                arch: &self.arch,
            };
            self.pass_typing.queue_module_definitions(&mut ctx, &mut queue);
        };
        while !queue.is_empty() {
            let top = queue.pop_front().unwrap();
            let mut ctx = visitor::Ctx{
                heap: &mut self.heap,
                modules: &mut self.modules,
                module_idx: top.root_id.index as usize,
                symbols: &mut self.symbol_table,
                types: &mut self.type_table,
                arch: &self.arch,
            };
            self.pass_typing.handle_module_definition(&mut ctx, &mut queue, top)?;
        }

        // Rewrite nodes in tree, then prepare for execution of code
        for module_idx in 0..self.modules.len() {
            self.modules[module_idx].phase = ModuleCompilationPhase::Typed;
            let mut ctx = visitor::Ctx{
                heap: &mut self.heap,
                modules: &mut self.modules,
                module_idx,
                symbols: &mut self.symbol_table,
                types: &mut self.type_table,
                arch: &self.arch,
            };
            self.pass_rewriting.visit_module(&mut ctx)?;
            self.pass_stack_size.visit_module(&mut ctx)?;
        }

        // Write out desired information
        if let Some(filename) = &self.write_ast_to {
            let mut writer = ASTWriter::new();
            let mut file = std::fs::File::create(std::path::Path::new(filename)).unwrap();
            writer.write_ast(&mut file, &self.heap);
        }

        Ok(())
    }

    /// Tries to find the standard library and add the files for parsing.
    fn feed_standard_library(&mut self) -> Result<(), String> {
        use std::env;
        use std::path::{Path, PathBuf};
        use std::fs;

        const FILES: [&'static str; 2] = [
            "std.global.pdl",
            "std.random.pdl",
        ];

        // Determine base directory
        let (base_path, from_env) = if let Ok(path) = env::var(REOWOLF_PATH_ENV) {
            // Path variable is set
            (path, true)
        } else {
            let mut path = String::with_capacity(REOWOLF_PATH_DIR.len() + 2);
            path.push_str("./");
            path.push_str(REOWOLF_PATH_DIR);
            (path, false)
        };

        // Make sure directory exists
        let path = Path::new(&base_path);
        if !path.exists() {
            return Err(format!("std lib root directory '{}' does not exist", base_path));
        }

        // Try to load all standard library files. We might need a more unified
        // way to do this in the future (i.e. a "std" package, containing all
        // of the modules)
        let mut file_path = PathBuf::new();
        let mut first_file = true;

        for file in FILES {
            file_path.clear();
            file_path.push(path);
            file_path.push(file);

            let source = fs::read(file_path.as_path());
            if let Err(err) = source {
                return Err(format!(
                    "failed to read std lib file '{}' in root directory '{}', because: {}",
                    file, base_path, err
                ));
            }

            let source = source.unwrap();
            let input_source = InputSource::new(file.to_string(), source);

            let module_index = self.feed_internal(input_source, true);
            if let Err(err) = module_index {
                // A bit of a hack, but shouldn't really happen anyway: the
                // compiler should ship with a decent standard library (at some
                // point)
                return Err(format!("{}", err));
            }
            let module_index = module_index.unwrap();

            if first_file {
                self.global_module_index = module_index;
                first_file = false;
            }
        }

        return Ok(())
    }

    fn feed_internal(&mut self, mut source: InputSource, is_compiler_file: bool) -> Result<usize, ParseError> {
        let mut token_buffer = TokenBuffer::new();
        self.pass_tokenizer.tokenize(&mut source, &mut token_buffer)?;

        let module = Module{
            source,
            tokens: token_buffer,
            is_compiler_file,
            root_id: RootId::new_invalid(),
            name: None,
            version: None,
            phase: ModuleCompilationPhase::Tokenized,
        };
        let module_index = self.modules.len();
        self.modules.push(module);

        return Ok(module_index);
    }
}

fn insert_builtin_type(type_table: &mut TypeTable, parts: Vec<ConcreteTypePart>, has_poly_var: bool, size: usize, alignment: usize) -> TypeId {
    const POLY_VARS: [PolymorphicVariable; 1] = [PolymorphicVariable{
        identifier: Identifier::new_empty(InputSpan::new()),
        is_in_use: false,
    }];

    let concrete_type = ConcreteType{ parts };
    let poly_var = if has_poly_var {
        POLY_VARS.as_slice()
    } else {
        &[]
    };

    return type_table.add_builtin_data_type(concrete_type, poly_var, size, alignment);
}