diff --git a/src/protocol/parser/mod.rs b/src/protocol/parser/mod.rs index e55bf6b31adb04d40e19891ed39a219762f616dd..8f1500639951df5c37527325b710f7e2f3e88097 100644 --- a/src/protocol/parser/mod.rs +++ b/src/protocol/parser/mod.rs @@ -30,8 +30,12 @@ use type_table::*; use crate::protocol::ast::*; use crate::protocol::input_source::*; -use crate::protocol::ast_printer::ASTWriter; +use crate::protocol::ast_writer::ASTWriter; use crate::protocol::parser::type_table::PolymorphicVariable; +use crate::protocol::token_writer::TokenWriter; + +const REOWOLF_PATH_ENV: &'static str = "REOWOLF_ROOT"; // first lookup reowolf path +const REOWOLF_PATH_DIR: &'static str = "std"; // then try folder in current working directory #[derive(Debug, PartialEq, Eq, PartialOrd, Ord)] pub enum ModuleCompilationPhase { @@ -48,10 +52,10 @@ pub enum ModuleCompilationPhase { } pub struct Module { - // Buffers pub source: InputSource, pub tokens: TokenBuffer, - // Identifiers + pub is_compiler_file: bool, // TODO: @Hack for custom compiler-only types + pub add_to_global_namespace: bool, pub root_id: RootId, pub name: Option<(PragmaId, StringRef<'static>)>, pub version: Option<(PragmaId, i64)>, @@ -118,6 +122,7 @@ pub struct Parser { pub(crate) modules: Vec, pub(crate) symbol_table: SymbolTable, pub(crate) type_table: TypeTable, + pub(crate) global_module_index: usize, // contains globals, implicitly imported everywhere // Compiler passes, used as little state machine that keep their memory // around. pass_tokenizer: PassTokenizer, @@ -129,18 +134,21 @@ pub struct Parser { pass_rewriting: PassRewriting, pass_stack_size: PassStackSize, // Compiler options + pub write_tokens_to: Option, pub write_ast_to: Option, + pub std_lib_dir: Option, pub(crate) arch: TargetArch, } impl Parser { - pub fn new() -> Self { + pub fn new(std_lib_dir: Option) -> Result { let mut parser = Parser{ heap: Heap::new(), string_pool: StringPool::new(), modules: Vec::new(), symbol_table: SymbolTable::new(), type_table: TypeTable::new(), + global_module_index: 0, pass_tokenizer: PassTokenizer::new(), pass_symbols: PassSymbols::new(), pass_import: PassImport::new(), @@ -149,7 +157,9 @@ impl Parser { pass_typing: PassTyping::new(), pass_rewriting: PassRewriting::new(), pass_stack_size: PassStackSize::new(), + write_tokens_to: None, write_ast_to: None, + std_lib_dir, arch: TargetArch::new(), }; @@ -176,78 +186,17 @@ impl Parser { parser.arch.output_type_id = insert_builtin_type(&mut parser.type_table, vec![ConcreteTypePart::Output, ConcreteTypePart::Void], true, 8, 8); parser.arch.pointer_type_id = insert_builtin_type(&mut parser.type_table, vec![ConcreteTypePart::Pointer, ConcreteTypePart::Void], true, 8, 8); - // Insert builtin functions - fn quick_type(variants: &[ParserTypeVariant]) -> ParserType { - let mut t = ParserType{ elements: Vec::with_capacity(variants.len()), full_span: InputSpan::new() }; - for variant in variants { - t.elements.push(ParserTypeElement{ element_span: InputSpan::new(), variant: variant.clone() }); - } - t - } + // Parse standard library + parser.feed_standard_library()?; - use ParserTypeVariant as PTV; - insert_builtin_function(&mut parser, "get", &["T"], |id| ( - vec![ - ("input", quick_type(&[PTV::Input, PTV::PolymorphicArgument(id.upcast(), 0)])) - ], - quick_type(&[PTV::PolymorphicArgument(id.upcast(), 0)]) - )); - insert_builtin_function(&mut parser, "put", &["T"], |id| ( - vec![ - ("output", quick_type(&[PTV::Output, PTV::PolymorphicArgument(id.upcast(), 0)])), - ("value", quick_type(&[PTV::PolymorphicArgument(id.upcast(), 0)])), - ], - quick_type(&[PTV::Void]) - )); - insert_builtin_function(&mut parser, "fires", &["T"], |id| ( - vec![ - ("port", quick_type(&[PTV::InputOrOutput, PTV::PolymorphicArgument(id.upcast(), 0)])) - ], - quick_type(&[PTV::Bool]) - )); - insert_builtin_function(&mut parser, "create", &["T"], |id| ( - vec![ - ("length", quick_type(&[PTV::IntegerLike])) - ], - quick_type(&[PTV::ArrayLike, PTV::PolymorphicArgument(id.upcast(), 0)]) - )); - insert_builtin_function(&mut parser, "length", &["T"], |id| ( - vec![ - ("array", quick_type(&[PTV::ArrayLike, PTV::PolymorphicArgument(id.upcast(), 0)])) - ], - quick_type(&[PTV::UInt32]) // TODO: @PtrInt - )); - insert_builtin_function(&mut parser, "assert", &[], |_id| ( - vec![ - ("condition", quick_type(&[PTV::Bool])), - ], - quick_type(&[PTV::Void]) - )); - insert_builtin_function(&mut parser, "print", &[], |_id| ( - vec![ - ("message", quick_type(&[PTV::String])), - ], - quick_type(&[PTV::Void]) - )); - - parser + return Ok(parser) } - pub fn feed(&mut self, mut source: InputSource) -> Result<(), ParseError> { - let mut token_buffer = TokenBuffer::new(); - self.pass_tokenizer.tokenize(&mut source, &mut token_buffer)?; - - let module = Module{ - source, - tokens: token_buffer, - root_id: RootId::new_invalid(), - name: None, - version: None, - phase: ModuleCompilationPhase::Tokenized, - }; - self.modules.push(module); - - Ok(()) + /// Feeds a new InputSource to the parser, which will tokenize it and store + /// it internally for later parsing (when all modules are present). Returns + /// the index of the new module. + pub fn feed(&mut self, mut source: InputSource) -> Result { + return self.feed_internal(source, false, false); } pub fn parse(&mut self) -> Result<(), ParseError> { @@ -270,6 +219,12 @@ impl Parser { self.pass_definitions.parse(&mut self.modules, module_idx, &mut pass_ctx)?; } + if let Some(filename) = &self.write_tokens_to { + let mut writer = TokenWriter::new(); + let mut file = std::fs::File::create(std::path::Path::new(filename)).unwrap(); + writer.write(&mut file, &self.modules); + } + // Add every known type to the type table self.type_table.build_base_types(&mut self.modules, &mut pass_ctx)?; @@ -337,6 +292,103 @@ impl Parser { Ok(()) } + + /// Tries to find the standard library and add the files for parsing. + fn feed_standard_library(&mut self) -> Result<(), String> { + use std::env; + use std::path::{Path, PathBuf}; + use std::fs; + + // Pair is (name, add_to_global_namespace) + const FILES: [(&'static str, bool); 3] = [ + ("std.global.pdl", true), + ("std.internet.pdl", false), + ("std.random.pdl", false), + ]; + + // Determine base directory + let (base_path, from_env) = if let Ok(path) = env::var(REOWOLF_PATH_ENV) { + // Path variable is set + (path, true) + } else { + let path = match self.std_lib_dir.take() { + Some(path) => path, + None => { + let mut path = String::with_capacity(REOWOLF_PATH_DIR.len() + 2); + path.push_str("./"); + path.push_str(REOWOLF_PATH_DIR); + path + } + }; + + (path, false) + }; + + // Make sure directory exists + let path = Path::new(&base_path); + if !path.exists() { + return Err(format!("std lib root directory '{}' does not exist", base_path)); + } + + // Try to load all standard library files. We might need a more unified + // way to do this in the future (i.e. a "std" package, containing all + // of the modules) + let mut file_path = PathBuf::new(); + let mut first_file = true; + + for (file, add_to_global_namespace) in FILES { + file_path.clear(); + file_path.push(path); + file_path.push(file); + + let source = fs::read(file_path.as_path()); + if let Err(err) = source { + return Err(format!( + "failed to read std lib file '{}' in root directory '{}', because: {}", + file, base_path, err + )); + } + + let source = source.unwrap(); + let input_source = InputSource::new(file.to_string(), source); + + let module_index = self.feed_internal(input_source, true, add_to_global_namespace); + if let Err(err) = module_index { + // A bit of a hack, but shouldn't really happen anyway: the + // compiler should ship with a decent standard library (at some + // point) + return Err(format!("{}", err)); + } + let module_index = module_index.unwrap(); + + if first_file { + self.global_module_index = module_index; + first_file = false; + } + } + + return Ok(()) + } + + fn feed_internal(&mut self, mut source: InputSource, is_compiler_file: bool, add_to_global_namespace: bool) -> Result { + let mut token_buffer = TokenBuffer::new(); + self.pass_tokenizer.tokenize(&mut source, &mut token_buffer)?; + + let module = Module{ + source, + tokens: token_buffer, + is_compiler_file, + add_to_global_namespace, + root_id: RootId::new_invalid(), + name: None, + version: None, + phase: ModuleCompilationPhase::Tokenized, + }; + let module_index = self.modules.len(); + self.modules.push(module); + + return Ok(module_index); + } } fn insert_builtin_type(type_table: &mut TypeTable, parts: Vec, has_poly_var: bool, size: usize, alignment: usize) -> TypeId { @@ -353,79 +405,4 @@ fn insert_builtin_type(type_table: &mut TypeTable, parts: Vec, }; return type_table.add_builtin_data_type(concrete_type, poly_var, size, alignment); -} - -// Note: args and return type need to be a function because we need to know the function ID. -fn insert_builtin_function (Vec<(&'static str, ParserType)>, ParserType)> ( - p: &mut Parser, func_name: &str, polymorphic: &[&str], arg_and_return_fn: T -) { - // Insert into AST (to get an ID), also prepare the polymorphic variables - // we need later for the type table - let mut ast_poly_vars = Vec::with_capacity(polymorphic.len()); - let mut type_poly_vars = Vec::with_capacity(polymorphic.len()); - for poly_var in polymorphic { - let identifier = Identifier{ span: InputSpan::new(), value: p.string_pool.intern(poly_var.as_bytes()) } ; - ast_poly_vars.push(identifier.clone()); - type_poly_vars.push(PolymorphicVariable{ identifier, is_in_use: false }); - } - - let func_ident_ref = p.string_pool.intern(func_name.as_bytes()); - let procedure_id = p.heap.alloc_procedure_definition(|this| ProcedureDefinition { - this, - defined_in: RootId::new_invalid(), - builtin: true, - kind: ProcedureKind::Function, - span: InputSpan::new(), - identifier: Identifier{ span: InputSpan::new(), value: func_ident_ref.clone() }, - poly_vars: ast_poly_vars, - return_type: None, - parameters: Vec::new(), - scope: ScopeId::new_invalid(), - body: BlockStatementId::new_invalid(), - monomorphs: Vec::new(), - }); - - // Modify AST with more information about the procedure - let (arguments, return_type) = arg_and_return_fn(procedure_id); - - let mut parameters = Vec::with_capacity(arguments.len()); - for (arg_name, arg_type) in arguments { - let identifier = Identifier{ span: InputSpan::new(), value: p.string_pool.intern(arg_name.as_bytes()) }; - let param_id = p.heap.alloc_variable(|this| Variable{ - this, - kind: VariableKind::Parameter, - parser_type: arg_type.clone(), - identifier, - relative_pos_in_parent: 0, - unique_id_in_scope: 0 - }); - parameters.push(param_id); - } - - let func = &mut p.heap[procedure_id]; - func.parameters = parameters; - func.return_type = Some(return_type); - - // Insert into symbol table - p.symbol_table.insert_symbol(SymbolScope::Global, Symbol{ - name: func_ident_ref, - variant: SymbolVariant::Definition(SymbolDefinition{ - defined_in_module: RootId::new_invalid(), - defined_in_scope: SymbolScope::Global, - definition_span: InputSpan::new(), - identifier_span: InputSpan::new(), - imported_at: None, - class: DefinitionClass::Function, - definition_id: procedure_id.upcast(), - }) - }).unwrap(); - - // Insert into type table - // let mut concrete_type = ConcreteType::default(); - // concrete_type.parts.push(ConcreteTypePart::Function(procedure_id, type_poly_vars.len() as u32)); - // - // for _ in 0..type_poly_vars.len() { - // concrete_type.parts.push(ConcreteTypePart::Void); // doesn't matter (I hope...) - // } - // p.type_table.add_builtin_procedure_type(concrete_type, &type_poly_vars); } \ No newline at end of file