#[macro_use] mod visitor; pub(crate) mod symbol_table; pub(crate) mod type_table; pub(crate) mod tokens; pub(crate) mod token_parsing; pub(crate) mod pass_tokenizer; pub(crate) mod pass_symbols; pub(crate) mod pass_imports; pub(crate) mod pass_definitions; pub(crate) mod pass_definitions_types; pub(crate) mod pass_validation_linking; pub(crate) mod pass_rewriting; pub(crate) mod pass_typing; pub(crate) mod pass_stack_size; use tokens::*; use crate::collections::*; use visitor::Visitor; use pass_tokenizer::PassTokenizer; use pass_symbols::PassSymbols; use pass_imports::PassImport; use pass_definitions::PassDefinitions; use pass_validation_linking::PassValidationLinking; use pass_typing::{PassTyping, ResolveQueue}; use pass_rewriting::PassRewriting; use pass_stack_size::PassStackSize; use symbol_table::*; use type_table::*; use crate::protocol::ast::*; use crate::protocol::input_source::*; use crate::protocol::ast_writer::ASTWriter; use crate::protocol::parser::type_table::PolymorphicVariable; use crate::protocol::token_writer::TokenWriter; const REOWOLF_PATH_ENV: &'static str = "REOWOLF_ROOT"; // first lookup reowolf path const REOWOLF_PATH_DIR: &'static str = "std"; // then try folder in current working directory #[derive(Debug, PartialEq, Eq, PartialOrd, Ord)] pub enum ModuleCompilationPhase { Tokenized, // source is tokenized SymbolsScanned, // all definitions are linked to their type class ImportsResolved, // all imports are added to the symbol table DefinitionsParsed, // produced the AST for the entire module TypesAddedToTable, // added all definitions to the type table ValidatedAndLinked, // AST is traversed and has linked the required AST nodes Typed, // Type inference and checking has been performed Rewritten, // Special AST nodes are rewritten into regular AST nodes // When we continue with the compiler: // StackSize } pub struct Module { pub source: InputSource, pub tokens: TokenBuffer, pub is_compiler_file: bool, // TODO: @Hack for custom compiler-only types pub add_to_global_namespace: bool, pub root_id: RootId, pub name: Option<(PragmaId, StringRef<'static>)>, pub version: Option<(PragmaId, i64)>, pub phase: ModuleCompilationPhase, } pub struct TargetArch { pub void_type_id: TypeId, pub message_type_id: TypeId, pub bool_type_id: TypeId, pub uint8_type_id: TypeId, pub uint16_type_id: TypeId, pub uint32_type_id: TypeId, pub uint64_type_id: TypeId, pub sint8_type_id: TypeId, pub sint16_type_id: TypeId, pub sint32_type_id: TypeId, pub sint64_type_id: TypeId, pub char_type_id: TypeId, pub string_type_id: TypeId, pub array_type_id: TypeId, pub slice_type_id: TypeId, pub input_type_id: TypeId, pub output_type_id: TypeId, pub pointer_type_id: TypeId, } impl TargetArch { fn new() -> Self { return Self{ void_type_id: TypeId::new_invalid(), bool_type_id: TypeId::new_invalid(), message_type_id: TypeId::new_invalid(), uint8_type_id: TypeId::new_invalid(), uint16_type_id: TypeId::new_invalid(), uint32_type_id: TypeId::new_invalid(), uint64_type_id: TypeId::new_invalid(), sint8_type_id: TypeId::new_invalid(), sint16_type_id: TypeId::new_invalid(), sint32_type_id: TypeId::new_invalid(), sint64_type_id: TypeId::new_invalid(), char_type_id: TypeId::new_invalid(), string_type_id: TypeId::new_invalid(), array_type_id: TypeId::new_invalid(), slice_type_id: TypeId::new_invalid(), input_type_id: TypeId::new_invalid(), output_type_id: TypeId::new_invalid(), pointer_type_id: TypeId::new_invalid(), } } } pub struct PassCtx<'a> { heap: &'a mut Heap, symbols: &'a mut SymbolTable, pool: &'a mut StringPool, arch: &'a TargetArch, } pub struct Parser { // Storage of all information created/gathered during compilation. pub(crate) heap: Heap, pub(crate) string_pool: StringPool, // Do not deallocate, holds all strings pub(crate) modules: Vec, pub(crate) symbol_table: SymbolTable, pub(crate) type_table: TypeTable, pub(crate) global_module_index: usize, // contains globals, implicitly imported everywhere // Compiler passes, used as little state machine that keep their memory // around. pass_tokenizer: PassTokenizer, pass_symbols: PassSymbols, pass_import: PassImport, pass_definitions: PassDefinitions, pass_validation: PassValidationLinking, pass_typing: PassTyping, pass_rewriting: PassRewriting, pass_stack_size: PassStackSize, // Compiler options pub write_tokens_to: Option, pub write_ast_to: Option, pub std_lib_dir: Option, pub(crate) arch: TargetArch, } impl Parser { pub fn new(std_lib_dir: Option) -> Result { let mut parser = Parser{ heap: Heap::new(), string_pool: StringPool::new(), modules: Vec::new(), symbol_table: SymbolTable::new(), type_table: TypeTable::new(), global_module_index: 0, pass_tokenizer: PassTokenizer::new(), pass_symbols: PassSymbols::new(), pass_import: PassImport::new(), pass_definitions: PassDefinitions::new(), pass_validation: PassValidationLinking::new(), pass_typing: PassTyping::new(), pass_rewriting: PassRewriting::new(), pass_stack_size: PassStackSize::new(), write_tokens_to: None, write_ast_to: None, std_lib_dir, arch: TargetArch::new(), }; parser.symbol_table.insert_scope(None, SymbolScope::Global); // Insert builtin types // TODO: At some point use correct values for size/alignment parser.arch.void_type_id = insert_builtin_type(&mut parser.type_table, vec![ConcreteTypePart::Void], false, 0, 1); parser.arch.message_type_id = insert_builtin_type(&mut parser.type_table, vec![ConcreteTypePart::Message], false, 24, 8); parser.arch.bool_type_id = insert_builtin_type(&mut parser.type_table, vec![ConcreteTypePart::Bool], false, 1, 1); parser.arch.uint8_type_id = insert_builtin_type(&mut parser.type_table, vec![ConcreteTypePart::UInt8], false, 1, 1); parser.arch.uint16_type_id = insert_builtin_type(&mut parser.type_table, vec![ConcreteTypePart::UInt16], false, 2, 2); parser.arch.uint32_type_id = insert_builtin_type(&mut parser.type_table, vec![ConcreteTypePart::UInt32], false, 4, 4); parser.arch.uint64_type_id = insert_builtin_type(&mut parser.type_table, vec![ConcreteTypePart::UInt64], false, 8, 8); parser.arch.sint8_type_id = insert_builtin_type(&mut parser.type_table, vec![ConcreteTypePart::SInt8], false, 1, 1); parser.arch.sint16_type_id = insert_builtin_type(&mut parser.type_table, vec![ConcreteTypePart::SInt16], false, 2, 2); parser.arch.sint32_type_id = insert_builtin_type(&mut parser.type_table, vec![ConcreteTypePart::SInt32], false, 4, 4); parser.arch.sint64_type_id = insert_builtin_type(&mut parser.type_table, vec![ConcreteTypePart::SInt64], false, 8, 8); parser.arch.char_type_id = insert_builtin_type(&mut parser.type_table, vec![ConcreteTypePart::Character], false, 4, 4); parser.arch.string_type_id = insert_builtin_type(&mut parser.type_table, vec![ConcreteTypePart::String], false, 24, 8); parser.arch.array_type_id = insert_builtin_type(&mut parser.type_table, vec![ConcreteTypePart::Array, ConcreteTypePart::Void], true, 24, 8); parser.arch.slice_type_id = insert_builtin_type(&mut parser.type_table, vec![ConcreteTypePart::Slice, ConcreteTypePart::Void], true, 16, 4); parser.arch.input_type_id = insert_builtin_type(&mut parser.type_table, vec![ConcreteTypePart::Input, ConcreteTypePart::Void], true, 8, 8); parser.arch.output_type_id = insert_builtin_type(&mut parser.type_table, vec![ConcreteTypePart::Output, ConcreteTypePart::Void], true, 8, 8); parser.arch.pointer_type_id = insert_builtin_type(&mut parser.type_table, vec![ConcreteTypePart::Pointer, ConcreteTypePart::Void], true, 8, 8); // Parse standard library parser.feed_standard_library()?; return Ok(parser) } /// Feeds a new InputSource to the parser, which will tokenize it and store /// it internally for later parsing (when all modules are present). Returns /// the index of the new module. pub fn feed(&mut self, mut source: InputSource) -> Result { return self.feed_internal(source, false, false); } pub fn parse(&mut self) -> Result<(), ParseError> { let mut pass_ctx = PassCtx{ heap: &mut self.heap, symbols: &mut self.symbol_table, pool: &mut self.string_pool, arch: &self.arch, }; // Advance all modules to the phase where all symbols are scanned for module_idx in 0..self.modules.len() { self.pass_symbols.parse(&mut self.modules, module_idx, &mut pass_ctx)?; } // With all symbols scanned, perform further compilation until we can // add all base types to the type table. for module_idx in 0..self.modules.len() { self.pass_import.parse(&mut self.modules, module_idx, &mut pass_ctx)?; self.pass_definitions.parse(&mut self.modules, module_idx, &mut pass_ctx)?; } if let Some(filename) = &self.write_tokens_to { let mut writer = TokenWriter::new(); let mut file = std::fs::File::create(std::path::Path::new(filename)).unwrap(); writer.write(&mut file, &self.modules); } // Add every known type to the type table self.type_table.build_base_types(&mut self.modules, &mut pass_ctx)?; // Continue compilation with the remaining phases now that the types // are all in the type table for module_idx in 0..self.modules.len() { let mut ctx = visitor::Ctx{ heap: &mut self.heap, modules: &mut self.modules, module_idx, symbols: &mut self.symbol_table, types: &mut self.type_table, arch: &self.arch, }; self.pass_validation.visit_module(&mut ctx)?; } // Perform typechecking on all modules let mut queue = ResolveQueue::new(); for module_idx in 0..self.modules.len() { let mut ctx = visitor::Ctx{ heap: &mut self.heap, modules: &mut self.modules, module_idx, symbols: &mut self.symbol_table, types: &mut self.type_table, arch: &self.arch, }; self.pass_typing.queue_module_definitions(&mut ctx, &mut queue); }; while !queue.is_empty() { let top = queue.pop_front().unwrap(); let mut ctx = visitor::Ctx{ heap: &mut self.heap, modules: &mut self.modules, module_idx: top.root_id.index as usize, symbols: &mut self.symbol_table, types: &mut self.type_table, arch: &self.arch, }; self.pass_typing.handle_module_definition(&mut ctx, &mut queue, top)?; } // Rewrite nodes in tree, then prepare for execution of code for module_idx in 0..self.modules.len() { self.modules[module_idx].phase = ModuleCompilationPhase::Typed; let mut ctx = visitor::Ctx{ heap: &mut self.heap, modules: &mut self.modules, module_idx, symbols: &mut self.symbol_table, types: &mut self.type_table, arch: &self.arch, }; self.pass_rewriting.visit_module(&mut ctx)?; self.pass_stack_size.visit_module(&mut ctx)?; } // Write out desired information if let Some(filename) = &self.write_ast_to { let mut writer = ASTWriter::new(); let mut file = std::fs::File::create(std::path::Path::new(filename)).unwrap(); writer.write_ast(&mut file, &self.heap); } Ok(()) } /// Tries to find the standard library and add the files for parsing. fn feed_standard_library(&mut self) -> Result<(), String> { use std::env; use std::path::{Path, PathBuf}; use std::fs; // Pair is (name, add_to_global_namespace) const FILES: [(&'static str, bool); 3] = [ ("std.global.pdl", true), ("std.internet.pdl", false), ("std.random.pdl", false), ]; // Determine base directory let (base_path, from_env) = if let Ok(path) = env::var(REOWOLF_PATH_ENV) { // Path variable is set (path, true) } else { let path = match self.std_lib_dir.take() { Some(path) => path, None => { let mut path = String::with_capacity(REOWOLF_PATH_DIR.len() + 2); path.push_str("./"); path.push_str(REOWOLF_PATH_DIR); path } }; (path, false) }; // Make sure directory exists let path = Path::new(&base_path); if !path.exists() { let from_env_message = if from_env { format!(" (retrieved from the environment variable '{}')", REOWOLF_PATH_ENV) } else { String::new() }; return Err(format!("std lib root directory '{}'{} does not exist", base_path, from_env_message)); } // Try to load all standard library files. We might need a more unified // way to do this in the future (i.e. a "std" package, containing all // of the modules) let mut file_path = PathBuf::new(); let mut first_file = true; for (file, add_to_global_namespace) in FILES { file_path.clear(); file_path.push(path); file_path.push(file); let source = fs::read(file_path.as_path()); if let Err(err) = source { return Err(format!( "failed to read std lib file '{}' in root directory '{}', because: {}", file, base_path, err )); } let source = source.unwrap(); let input_source = InputSource::new(file.to_string(), source); let module_index = self.feed_internal(input_source, true, add_to_global_namespace); if let Err(err) = module_index { // A bit of a hack, but shouldn't really happen anyway: the // compiler should ship with a decent standard library (at some // point) return Err(format!("{}", err)); } let module_index = module_index.unwrap(); if first_file { self.global_module_index = module_index; first_file = false; } } return Ok(()) } fn feed_internal(&mut self, mut source: InputSource, is_compiler_file: bool, add_to_global_namespace: bool) -> Result { let mut token_buffer = TokenBuffer::new(); self.pass_tokenizer.tokenize(&mut source, &mut token_buffer)?; let module = Module{ source, tokens: token_buffer, is_compiler_file, add_to_global_namespace, root_id: RootId::new_invalid(), name: None, version: None, phase: ModuleCompilationPhase::Tokenized, }; let module_index = self.modules.len(); self.modules.push(module); return Ok(module_index); } } fn insert_builtin_type(type_table: &mut TypeTable, parts: Vec, has_poly_var: bool, size: usize, alignment: usize) -> TypeId { const POLY_VARS: [PolymorphicVariable; 1] = [PolymorphicVariable{ identifier: Identifier::new_empty(InputSpan::new()), is_in_use: false, }]; let concrete_type = ConcreteType{ parts }; let poly_var = if has_poly_var { POLY_VARS.as_slice() } else { &[] }; return type_table.add_builtin_data_type(concrete_type, poly_var, size, alignment); }