diff --git a/src/protocol/parser/mod.rs b/src/protocol/parser/mod.rs index 935ccdcc7d7b7b3e50bc31fbeb2cef2f96d7faff..e55bf6b31adb04d40e19891ed39a219762f616dd 100644 --- a/src/protocol/parser/mod.rs +++ b/src/protocol/parser/mod.rs @@ -1,3 +1,4 @@ +#[macro_use] mod visitor; pub(crate) mod symbol_table; pub(crate) mod type_table; pub(crate) mod tokens; @@ -8,8 +9,9 @@ pub(crate) mod pass_imports; pub(crate) mod pass_definitions; pub(crate) mod pass_definitions_types; pub(crate) mod pass_validation_linking; +pub(crate) mod pass_rewriting; pub(crate) mod pass_typing; -mod visitor; +pub(crate) mod pass_stack_size; use tokens::*; use crate::collections::*; @@ -20,13 +22,16 @@ use pass_imports::PassImport; use pass_definitions::PassDefinitions; use pass_validation_linking::PassValidationLinking; use pass_typing::{PassTyping, ResolveQueue}; +use pass_rewriting::PassRewriting; +use pass_stack_size::PassStackSize; use symbol_table::*; -use type_table::TypeTable; +use type_table::*; use crate::protocol::ast::*; use crate::protocol::input_source::*; use crate::protocol::ast_printer::ASTWriter; +use crate::protocol::parser::type_table::PolymorphicVariable; #[derive(Debug, PartialEq, Eq, PartialOrd, Ord)] pub enum ModuleCompilationPhase { @@ -36,8 +41,10 @@ pub enum ModuleCompilationPhase { DefinitionsParsed, // produced the AST for the entire module TypesAddedToTable, // added all definitions to the type table ValidatedAndLinked, // AST is traversed and has linked the required AST nodes + Typed, // Type inference and checking has been performed + Rewritten, // Special AST nodes are rewritten into regular AST nodes // When we continue with the compiler: - // Typed, // Type inference and checking has been performed + // StackSize } pub struct Module { @@ -51,16 +58,50 @@ pub struct Module { pub phase: ModuleCompilationPhase, } -// TODO: This is kind of wrong. Because when we're producing bytecode we would -// like the bytecode itself to not have the notion of the size of a pointer -// type. But until I figure out what we do want I'll just set everything -// to a 64-bit architecture. pub struct TargetArch { - pub array_size_alignment: (usize, usize), - pub slice_size_alignment: (usize, usize), - pub string_size_alignment: (usize, usize), - pub port_size_alignment: (usize, usize), - pub pointer_size_alignment: (usize, usize), + pub void_type_id: TypeId, + pub message_type_id: TypeId, + pub bool_type_id: TypeId, + pub uint8_type_id: TypeId, + pub uint16_type_id: TypeId, + pub uint32_type_id: TypeId, + pub uint64_type_id: TypeId, + pub sint8_type_id: TypeId, + pub sint16_type_id: TypeId, + pub sint32_type_id: TypeId, + pub sint64_type_id: TypeId, + pub char_type_id: TypeId, + pub string_type_id: TypeId, + pub array_type_id: TypeId, + pub slice_type_id: TypeId, + pub input_type_id: TypeId, + pub output_type_id: TypeId, + pub pointer_type_id: TypeId, +} + +impl TargetArch { + fn new() -> Self { + return Self{ + void_type_id: TypeId::new_invalid(), + bool_type_id: TypeId::new_invalid(), + message_type_id: TypeId::new_invalid(), + uint8_type_id: TypeId::new_invalid(), + uint16_type_id: TypeId::new_invalid(), + uint32_type_id: TypeId::new_invalid(), + uint64_type_id: TypeId::new_invalid(), + sint8_type_id: TypeId::new_invalid(), + sint16_type_id: TypeId::new_invalid(), + sint32_type_id: TypeId::new_invalid(), + sint64_type_id: TypeId::new_invalid(), + char_type_id: TypeId::new_invalid(), + string_type_id: TypeId::new_invalid(), + array_type_id: TypeId::new_invalid(), + slice_type_id: TypeId::new_invalid(), + input_type_id: TypeId::new_invalid(), + output_type_id: TypeId::new_invalid(), + pointer_type_id: TypeId::new_invalid(), + } + } } pub struct PassCtx<'a> { @@ -85,6 +126,8 @@ pub struct Parser { pass_definitions: PassDefinitions, pass_validation: PassValidationLinking, pass_typing: PassTyping, + pass_rewriting: PassRewriting, + pass_stack_size: PassStackSize, // Compiler options pub write_ast_to: Option, pub(crate) arch: TargetArch, @@ -104,18 +147,36 @@ impl Parser { pass_definitions: PassDefinitions::new(), pass_validation: PassValidationLinking::new(), pass_typing: PassTyping::new(), + pass_rewriting: PassRewriting::new(), + pass_stack_size: PassStackSize::new(), write_ast_to: None, - arch: TargetArch { - array_size_alignment: (3*8, 8), // pointer, length, capacity - slice_size_alignment: (2*8, 8), // pointer, length - string_size_alignment: (3*8, 8), // pointer, length, capacity - port_size_alignment: (3*4, 4), // two u32s: connector + port ID - pointer_size_alignment: (8, 8), - } + arch: TargetArch::new(), }; parser.symbol_table.insert_scope(None, SymbolScope::Global); + // Insert builtin types + // TODO: At some point use correct values for size/alignment + parser.arch.void_type_id = insert_builtin_type(&mut parser.type_table, vec![ConcreteTypePart::Void], false, 0, 1); + parser.arch.message_type_id = insert_builtin_type(&mut parser.type_table, vec![ConcreteTypePart::Message], false, 24, 8); + parser.arch.bool_type_id = insert_builtin_type(&mut parser.type_table, vec![ConcreteTypePart::Bool], false, 1, 1); + parser.arch.uint8_type_id = insert_builtin_type(&mut parser.type_table, vec![ConcreteTypePart::UInt8], false, 1, 1); + parser.arch.uint16_type_id = insert_builtin_type(&mut parser.type_table, vec![ConcreteTypePart::UInt16], false, 2, 2); + parser.arch.uint32_type_id = insert_builtin_type(&mut parser.type_table, vec![ConcreteTypePart::UInt32], false, 4, 4); + parser.arch.uint64_type_id = insert_builtin_type(&mut parser.type_table, vec![ConcreteTypePart::UInt64], false, 8, 8); + parser.arch.sint8_type_id = insert_builtin_type(&mut parser.type_table, vec![ConcreteTypePart::SInt8], false, 1, 1); + parser.arch.sint16_type_id = insert_builtin_type(&mut parser.type_table, vec![ConcreteTypePart::SInt16], false, 2, 2); + parser.arch.sint32_type_id = insert_builtin_type(&mut parser.type_table, vec![ConcreteTypePart::SInt32], false, 4, 4); + parser.arch.sint64_type_id = insert_builtin_type(&mut parser.type_table, vec![ConcreteTypePart::SInt64], false, 8, 8); + parser.arch.char_type_id = insert_builtin_type(&mut parser.type_table, vec![ConcreteTypePart::Character], false, 4, 4); + parser.arch.string_type_id = insert_builtin_type(&mut parser.type_table, vec![ConcreteTypePart::String], false, 24, 8); + parser.arch.array_type_id = insert_builtin_type(&mut parser.type_table, vec![ConcreteTypePart::Array, ConcreteTypePart::Void], true, 24, 8); + parser.arch.slice_type_id = insert_builtin_type(&mut parser.type_table, vec![ConcreteTypePart::Slice, ConcreteTypePart::Void], true, 16, 4); + parser.arch.input_type_id = insert_builtin_type(&mut parser.type_table, vec![ConcreteTypePart::Input, ConcreteTypePart::Void], true, 8, 8); + parser.arch.output_type_id = insert_builtin_type(&mut parser.type_table, vec![ConcreteTypePart::Output, ConcreteTypePart::Void], true, 8, 8); + parser.arch.pointer_type_id = insert_builtin_type(&mut parser.type_table, vec![ConcreteTypePart::Pointer, ConcreteTypePart::Void], true, 8, 8); + + // Insert builtin functions fn quick_type(variants: &[ParserTypeVariant]) -> ParserType { let mut t = ParserType{ elements: Vec::with_capacity(variants.len()), full_span: InputSpan::new() }; for variant in variants { @@ -237,10 +298,10 @@ impl Parser { types: &mut self.type_table, arch: &self.arch, }; - PassTyping::queue_module_definitions(&mut ctx, &mut queue); + self.pass_typing.queue_module_definitions(&mut ctx, &mut queue); }; while !queue.is_empty() { - let top = queue.pop().unwrap(); + let top = queue.pop_front().unwrap(); let mut ctx = visitor::Ctx{ heap: &mut self.heap, modules: &mut self.modules, @@ -252,6 +313,21 @@ impl Parser { self.pass_typing.handle_module_definition(&mut ctx, &mut queue, top)?; } + // Rewrite nodes in tree, then prepare for execution of code + for module_idx in 0..self.modules.len() { + self.modules[module_idx].phase = ModuleCompilationPhase::Typed; + let mut ctx = visitor::Ctx{ + heap: &mut self.heap, + modules: &mut self.modules, + module_idx, + symbols: &mut self.symbol_table, + types: &mut self.type_table, + arch: &self.arch, + }; + self.pass_rewriting.visit_module(&mut ctx)?; + self.pass_stack_size.visit_module(&mut ctx)?; + } + // Write out desired information if let Some(filename) = &self.write_ast_to { let mut writer = ASTWriter::new(); @@ -263,49 +339,74 @@ impl Parser { } } -// Note: args and return type need to be a function because we need to know the function ID. -fn insert_builtin_function (Vec<(&'static str, ParserType)>, ParserType)> ( - p: &mut Parser, func_name: &str, polymorphic: &[&str], arg_and_return_fn: T) { +fn insert_builtin_type(type_table: &mut TypeTable, parts: Vec, has_poly_var: bool, size: usize, alignment: usize) -> TypeId { + const POLY_VARS: [PolymorphicVariable; 1] = [PolymorphicVariable{ + identifier: Identifier::new_empty(InputSpan::new()), + is_in_use: false, + }]; - let mut poly_vars = Vec::with_capacity(polymorphic.len()); + let concrete_type = ConcreteType{ parts }; + let poly_var = if has_poly_var { + POLY_VARS.as_slice() + } else { + &[] + }; + + return type_table.add_builtin_data_type(concrete_type, poly_var, size, alignment); +} + +// Note: args and return type need to be a function because we need to know the function ID. +fn insert_builtin_function (Vec<(&'static str, ParserType)>, ParserType)> ( + p: &mut Parser, func_name: &str, polymorphic: &[&str], arg_and_return_fn: T +) { + // Insert into AST (to get an ID), also prepare the polymorphic variables + // we need later for the type table + let mut ast_poly_vars = Vec::with_capacity(polymorphic.len()); + let mut type_poly_vars = Vec::with_capacity(polymorphic.len()); for poly_var in polymorphic { - poly_vars.push(Identifier{ span: InputSpan::new(), value: p.string_pool.intern(poly_var.as_bytes()) }); + let identifier = Identifier{ span: InputSpan::new(), value: p.string_pool.intern(poly_var.as_bytes()) } ; + ast_poly_vars.push(identifier.clone()); + type_poly_vars.push(PolymorphicVariable{ identifier, is_in_use: false }); } let func_ident_ref = p.string_pool.intern(func_name.as_bytes()); - let func_id = p.heap.alloc_function_definition(|this| FunctionDefinition{ + let procedure_id = p.heap.alloc_procedure_definition(|this| ProcedureDefinition { this, defined_in: RootId::new_invalid(), builtin: true, + kind: ProcedureKind::Function, span: InputSpan::new(), identifier: Identifier{ span: InputSpan::new(), value: func_ident_ref.clone() }, - poly_vars, - return_types: Vec::new(), + poly_vars: ast_poly_vars, + return_type: None, parameters: Vec::new(), + scope: ScopeId::new_invalid(), body: BlockStatementId::new_invalid(), - num_expressions_in_body: -1, + monomorphs: Vec::new(), }); - let (args, ret) = arg_and_return_fn(func_id); + // Modify AST with more information about the procedure + let (arguments, return_type) = arg_and_return_fn(procedure_id); - let mut parameters = Vec::with_capacity(args.len()); - for (arg_name, arg_type) in args { + let mut parameters = Vec::with_capacity(arguments.len()); + for (arg_name, arg_type) in arguments { let identifier = Identifier{ span: InputSpan::new(), value: p.string_pool.intern(arg_name.as_bytes()) }; let param_id = p.heap.alloc_variable(|this| Variable{ this, kind: VariableKind::Parameter, parser_type: arg_type.clone(), identifier, - relative_pos_in_block: 0, + relative_pos_in_parent: 0, unique_id_in_scope: 0 }); parameters.push(param_id); } - let func = &mut p.heap[func_id]; + let func = &mut p.heap[procedure_id]; func.parameters = parameters; - func.return_types.push(ret); + func.return_type = Some(return_type); + // Insert into symbol table p.symbol_table.insert_symbol(SymbolScope::Global, Symbol{ name: func_ident_ref, variant: SymbolVariant::Definition(SymbolDefinition{ @@ -315,7 +416,16 @@ fn insert_builtin_function (Vec<(&'static str, Pa identifier_span: InputSpan::new(), imported_at: None, class: DefinitionClass::Function, - definition_id: func_id.upcast(), + definition_id: procedure_id.upcast(), }) }).unwrap(); + + // Insert into type table + // let mut concrete_type = ConcreteType::default(); + // concrete_type.parts.push(ConcreteTypePart::Function(procedure_id, type_poly_vars.len() as u32)); + // + // for _ in 0..type_poly_vars.len() { + // concrete_type.parts.push(ConcreteTypePart::Void); // doesn't matter (I hope...) + // } + // p.type_table.add_builtin_procedure_type(concrete_type, &type_poly_vars); } \ No newline at end of file