Changeset - e1432caf0735
[Not reviewed]
1 6 1
MH - 3 years ago 2022-03-25 10:37:47
contact@maxhenger.nl
Replace reserved keywords with global std lib funcs
7 files changed with 47 insertions and 19 deletions:
0 comments (0 inline, 0 general)
src/protocol/ast_writer.rs
Show inline comments
 
file renamed from src/protocol/ast_printer.rs to src/protocol/ast_writer.rs
src/protocol/mod.rs
Show inline comments
 
mod arena;
 
pub(crate) mod eval;
 
pub(crate) mod input_source;
 
mod parser;
 
#[cfg(test)] mod tests;
 

	
 
pub(crate) mod ast;
 
pub(crate) mod ast_printer;
 
pub(crate) mod ast_writer;
 

	
 
use std::sync::Mutex;
 

	
 
use crate::collections::{StringPool, StringRef};
 
pub use crate::protocol::ast::*;
 
use crate::protocol::eval::*;
 
use crate::protocol::input_source::*;
 
use crate::protocol::parser::*;
 
use crate::protocol::type_table::*;
 

	
 
pub use parser::type_table::TypeId;
 

	
src/protocol/parser/mod.rs
Show inline comments
 
@@ -21,25 +21,25 @@ use pass_symbols::PassSymbols;
 
use pass_imports::PassImport;
 
use pass_definitions::PassDefinitions;
 
use pass_validation_linking::PassValidationLinking;
 
use pass_typing::{PassTyping, ResolveQueue};
 
use pass_rewriting::PassRewriting;
 
use pass_stack_size::PassStackSize;
 
use symbol_table::*;
 
use type_table::*;
 

	
 
use crate::protocol::ast::*;
 
use crate::protocol::input_source::*;
 

	
 
use crate::protocol::ast_printer::ASTWriter;
 
use crate::protocol::ast_writer::ASTWriter;
 
use crate::protocol::parser::type_table::PolymorphicVariable;
 

	
 
const REOWOLF_PATH_ENV: &'static str = "REOWOLF_ROOT"; // first lookup reowolf path
 
const REOWOLF_PATH_DIR: &'static str = "std"; // then try folder in current working directory
 

	
 
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)]
 
pub enum ModuleCompilationPhase {
 
    Tokenized,              // source is tokenized
 
    SymbolsScanned,         // all definitions are linked to their type class
 
    ImportsResolved,        // all imports are added to the symbol table
 
    DefinitionsParsed,      // produced the AST for the entire module
 
    TypesAddedToTable,      // added all definitions to the type table
 
@@ -112,47 +112,49 @@ pub struct PassCtx<'a> {
 
    symbols: &'a mut SymbolTable,
 
    pool: &'a mut StringPool,
 
    arch: &'a TargetArch,
 
}
 

	
 
pub struct Parser {
 
    // Storage of all information created/gathered during compilation.
 
    pub(crate) heap: Heap,
 
    pub(crate) string_pool: StringPool, // Do not deallocate, holds all strings
 
    pub(crate) modules: Vec<Module>,
 
    pub(crate) symbol_table: SymbolTable,
 
    pub(crate) type_table: TypeTable,
 
    pub(crate) global_module_index: usize, // contains globals, implicitly imported everywhere
 
    // Compiler passes, used as little state machine that keep their memory
 
    // around.
 
    pass_tokenizer: PassTokenizer,
 
    pass_symbols: PassSymbols,
 
    pass_import: PassImport,
 
    pass_definitions: PassDefinitions,
 
    pass_validation: PassValidationLinking,
 
    pass_typing: PassTyping,
 
    pass_rewriting: PassRewriting,
 
    pass_stack_size: PassStackSize,
 
    // Compiler options
 
    pub write_ast_to: Option<String>,
 
    pub(crate) arch: TargetArch,
 
}
 

	
 
impl Parser {
 
    pub fn new() -> Result<Self, String> {
 
        let mut parser = Parser{
 
            heap: Heap::new(),
 
            string_pool: StringPool::new(),
 
            modules: Vec::new(),
 
            symbol_table: SymbolTable::new(),
 
            type_table: TypeTable::new(),
 
            global_module_index: 0,
 
            pass_tokenizer: PassTokenizer::new(),
 
            pass_symbols: PassSymbols::new(),
 
            pass_import: PassImport::new(),
 
            pass_definitions: PassDefinitions::new(),
 
            pass_validation: PassValidationLinking::new(),
 
            pass_typing: PassTyping::new(),
 
            pass_rewriting: PassRewriting::new(),
 
            pass_stack_size: PassStackSize::new(),
 
            write_ast_to: None,
 
            arch: TargetArch::new(),
 
        };
 

	
 
@@ -170,44 +172,49 @@ impl Parser {
 
        parser.arch.sint8_type_id   = insert_builtin_type(&mut parser.type_table, vec![ConcreteTypePart::SInt8], false, 1, 1);
 
        parser.arch.sint16_type_id  = insert_builtin_type(&mut parser.type_table, vec![ConcreteTypePart::SInt16], false, 2, 2);
 
        parser.arch.sint32_type_id  = insert_builtin_type(&mut parser.type_table, vec![ConcreteTypePart::SInt32], false, 4, 4);
 
        parser.arch.sint64_type_id  = insert_builtin_type(&mut parser.type_table, vec![ConcreteTypePart::SInt64], false, 8, 8);
 
        parser.arch.char_type_id    = insert_builtin_type(&mut parser.type_table, vec![ConcreteTypePart::Character], false, 4, 4);
 
        parser.arch.string_type_id  = insert_builtin_type(&mut parser.type_table, vec![ConcreteTypePart::String], false, 24, 8);
 
        parser.arch.array_type_id   = insert_builtin_type(&mut parser.type_table, vec![ConcreteTypePart::Array, ConcreteTypePart::Void], true, 24, 8);
 
        parser.arch.slice_type_id   = insert_builtin_type(&mut parser.type_table, vec![ConcreteTypePart::Slice, ConcreteTypePart::Void], true, 16, 4);
 
        parser.arch.input_type_id   = insert_builtin_type(&mut parser.type_table, vec![ConcreteTypePart::Input, ConcreteTypePart::Void], true, 8, 8);
 
        parser.arch.output_type_id  = insert_builtin_type(&mut parser.type_table, vec![ConcreteTypePart::Output, ConcreteTypePart::Void], true, 8, 8);
 
        parser.arch.pointer_type_id = insert_builtin_type(&mut parser.type_table, vec![ConcreteTypePart::Pointer, ConcreteTypePart::Void], true, 8, 8);
 

	
 
        // Parse standard library
 
        parser.feed_standard_library()?;
 

	
 
        return Ok(parser)
 
    }
 

	
 
    pub fn feed(&mut self, mut source: InputSource) -> Result<(), ParseError> {
 
    /// Feeds a new InputSource to the parser, which will tokenize it and store
 
    /// it internally for later parsing (when all modules are present). Returns
 
    /// the index of the new module.
 
    pub fn feed(&mut self, mut source: InputSource) -> Result<usize, ParseError> {
 
        let mut token_buffer = TokenBuffer::new();
 
        self.pass_tokenizer.tokenize(&mut source, &mut token_buffer)?;
 

	
 
        let module = Module{
 
            source,
 
            tokens: token_buffer,
 
            root_id: RootId::new_invalid(),
 
            name: None,
 
            version: None,
 
            phase: ModuleCompilationPhase::Tokenized,
 
        };
 
        let module_index = self.modules.len();
 
        self.modules.push(module);
 

	
 
        Ok(())
 
        return Ok(module_index);
 
    }
 

	
 
    pub fn parse(&mut self) -> Result<(), ParseError> {
 
        let mut pass_ctx = PassCtx{
 
            heap: &mut self.heap,
 
            symbols: &mut self.symbol_table,
 
            pool: &mut self.string_pool,
 
            arch: &self.arch,
 
        };
 

	
 
        // Advance all modules to the phase where all symbols are scanned
 
        for module_idx in 0..self.modules.len() {
 
@@ -311,45 +318,54 @@ impl Parser {
 
        };
 

	
 
        // Make sure directory exists
 
        let path = Path::new(&base_path);
 
        if !path.exists() {
 
            return Err(format!("std lib root directory '{}' does not exist", base_path));
 
        }
 

	
 
        // Try to load all standard library files. We might need a more unified
 
        // way to do this in the future (i.e. a "std" package, containing all
 
        // of the modules)
 
        let mut file_path = PathBuf::new();
 
        let mut first_file = true;
 

	
 
        for file in FILES {
 
            file_path.push(path);
 
            file_path.push(file);
 

	
 
            let source = fs::read(file_path.as_path());
 
            if let Err(err) = source {
 
                return Err(format!(
 
                    "failed to read std lib file '{}' in root directory '{}', because: {}",
 
                    file, base_path, err
 
                ));
 
            }
 

	
 
            let source = source.unwrap();
 
            let input_source = InputSource::new(file.to_string(), source);
 

	
 
            if let Err(err) = self.feed(input_source) {
 
            let module_index = self.feed(input_source);
 
            if let Err(err) = module_index {
 
                // A bit of a hack, but shouldn't really happen anyway: the
 
                // compiler should ship with a decent standard library (at some
 
                // point)
 
                return Err(format!("{}", err));
 
            }
 
            let module_index = module_index.unwrap();
 

	
 
            if first_file {
 
                self.global_module_index = module_index;
 
                first_file = false;
 
            }
 
        }
 

	
 
        return Ok(())
 
    }
 
}
 

	
 
fn insert_builtin_type(type_table: &mut TypeTable, parts: Vec<ConcreteTypePart>, has_poly_var: bool, size: usize, alignment: usize) -> TypeId {
 
    const POLY_VARS: [PolymorphicVariable; 1] = [PolymorphicVariable{
 
        identifier: Identifier::new_empty(InputSpan::new()),
 
        is_in_use: false,
 
    }];
 

	
src/protocol/parser/pass_definitions.rs
Show inline comments
 
@@ -246,24 +246,25 @@ impl PassDefinitions {
 
        let union_def = ctx.heap[definition_id].as_union_mut();
 
        union_def.variants = variants_section.into_vec();
 

	
 
        Ok(())
 
    }
 

	
 
    fn visit_function_definition(
 
        &mut self, module: &Module, iter: &mut TokenIter, ctx: &mut PassCtx
 
    ) -> Result<(), ParseError> {
 
        // Retrieve function name
 
        consume_exact_ident(&module.source, iter, KW_FUNCTION)?;
 
        let (ident_text, _) = consume_ident(&module.source, iter)?;
 
        let stringy = String::from_utf8_lossy(ident_text).to_string();
 

	
 
        // Retrieve preallocated DefinitionId
 
        let module_scope = SymbolScope::Module(module.root_id);
 
        let definition_id = ctx.symbols.get_symbol_by_name_defined_in_scope(module_scope, ident_text)
 
            .unwrap().variant.as_definition().definition_id;
 
        self.cur_definition = definition_id;
 

	
 
        consume_polymorphic_vars_spilled(&module.source, iter, ctx)?;
 

	
 
        // Parse function's argument list
 
        let mut parameter_section = self.variables.start_section();
 
        consume_parameter_list(
 
@@ -333,33 +334,41 @@ impl PassDefinitions {
 
        Ok(())
 
    }
 

	
 
    /// Consumes a procedure's body: either a user-defined procedure, which we
 
    /// parse as normal, or a builtin function, where we'll make sure we expect
 
    /// the particular builtin.
 
    ///
 
    /// We expect that the procedure's name is already stored in the
 
    /// preallocated AST node.
 
    fn consume_procedure_body(
 
        &mut self, module: &Module, iter: &mut TokenIter, ctx: &mut PassCtx, definition_id: DefinitionId, kind: ProcedureKind
 
    ) -> Result<(BlockStatementId, ProcedureSource), ParseError> {
 
        if iter.peek() == Some(TokenKind::Pragma) {
 
        if iter.next() == Some(TokenKind::OpenCurly) && iter.peek() == Some(TokenKind::Pragma) {
 
            // Consume the placeholder "{ #builtin }" tokens
 
            iter.consume(); // opening curly brace
 
            let (pragma, pragma_start, pragma_end) = consume_pragma(&module.source, iter)?;
 
            if pragma != b"#builtin" {
 
                return Err(ParseError::new_error_str_at_span(
 
                    &module.source, InputSpan::from_positions(pragma_start, pragma_end),
 
                    "expected a '#builtin' pragma, or a function body"
 
                ));
 
            }
 

	
 
            if iter.next() != Some(TokenKind::CloseCurly) {
 
                // Just to keep the compiler writers in line ;)
 
                panic!("compiler error: when using the #builtin pragma, wrap it in curly braces");
 
            }
 
            iter.consume();
 

	
 
            // Retrieve module and procedure name
 
            assert!(module.name.is_some(), "compiler error: builtin procedure body in unnamed module");
 
            let (_, module_name) = module.name.as_ref().unwrap();
 
            let module_name = module_name.as_str();
 

	
 
            let definition = ctx.heap[definition_id].as_procedure();
 
            let procedure_name = definition.identifier.value.as_str();
 

	
 
            let source = match (module_name, procedure_name) {
 
                ("std.global", "get") => ProcedureSource::FuncGet,
 
                ("std.global", "put") => ProcedureSource::FuncPut,
 
                ("std.global", "fires") => ProcedureSource::FuncFires,
src/protocol/parser/pass_symbols.rs
Show inline comments
 
@@ -186,24 +186,25 @@ impl PassSymbols {
 
        let range = &module.tokens.ranges[range_idx];
 
        let definition_span = InputSpan::from_positions(
 
            module.tokens.start_pos(range),
 
            module.tokens.end_pos(range)
 
        );
 
        let mut iter = module.tokens.iter_range(range);
 

	
 
        // First ident must be type of symbol
 
        let (kw_text, _) = consume_any_ident(&module.source, &mut iter).unwrap();
 

	
 
        // Retrieve identifier of definition
 
        let identifier = consume_ident_interned(&module.source, &mut iter, ctx)?;
 
        println!("DEBUG: Parsing {} --- {}", String::from_utf8_lossy(kw_text).to_string(), identifier.value.as_str());
 
        let mut poly_vars = Vec::new();
 
        maybe_consume_comma_separated(
 
            TokenKind::OpenAngle, TokenKind::CloseAngle, &module.source, &mut iter, ctx,
 
            |source, iter, ctx| consume_ident_interned(source, iter, ctx),
 
            &mut poly_vars, "a polymorphic variable", None
 
        )?;
 
        let ident_text = identifier.value.clone(); // because we need it later
 
        let ident_span = identifier.span.clone();
 

	
 
        // Reserve space in AST for definition and add it to the symbol table
 
        let definition_class;
 
        let ast_definition_id;
src/protocol/parser/token_parsing.rs
Show inline comments
 
@@ -531,25 +531,26 @@ fn is_reserved_statement_keyword(text: &[u8]) -> bool {
 
        KW_IMPORT | KW_AS |
 
        KW_STMT_CHANNEL | KW_STMT_IF | KW_STMT_WHILE |
 
        KW_STMT_BREAK | KW_STMT_CONTINUE | KW_STMT_GOTO | KW_STMT_RETURN |
 
        KW_STMT_SYNC | KW_STMT_FORK | KW_STMT_NEW => true,
 
        _ => false,
 
    }
 
}
 

	
 
fn is_reserved_expression_keyword(text: &[u8]) -> bool {
 
    match text {
 
        KW_LET | KW_CAST |
 
        KW_LIT_TRUE | KW_LIT_FALSE | KW_LIT_NULL |
 
        KW_FUNC_GET | KW_FUNC_PUT | KW_FUNC_FIRES | KW_FUNC_CREATE | KW_FUNC_ASSERT | KW_FUNC_LENGTH | KW_FUNC_PRINT => true,
 
        // TODO: Remove this once global namespace errors work @nocommit
 
        // KW_FUNC_GET | KW_FUNC_PUT | KW_FUNC_FIRES | KW_FUNC_CREATE | KW_FUNC_ASSERT | KW_FUNC_LENGTH | KW_FUNC_PRINT => true,
 
        _ => false,
 
    }
 
}
 

	
 
fn is_reserved_type_keyword(text: &[u8]) -> bool {
 
    match text {
 
        KW_TYPE_IN_PORT | KW_TYPE_OUT_PORT | KW_TYPE_MESSAGE | KW_TYPE_BOOL |
 
        KW_TYPE_UINT8 | KW_TYPE_UINT16 | KW_TYPE_UINT32 | KW_TYPE_UINT64 |
 
        KW_TYPE_SINT8 | KW_TYPE_SINT16 | KW_TYPE_SINT32 | KW_TYPE_SINT64 |
 
        KW_TYPE_CHAR | KW_TYPE_STRING |
 
        KW_TYPE_INFERRED => true,
 
        _ => false,
 
@@ -594,33 +595,34 @@ pub(crate) fn construct_symbol_conflict_error(
 
            },
 
            SymbolVariant::Definition(definition) => {
 
                if definition.defined_in_module.is_invalid() {
 
                    // Must be a builtin thing
 
                    return (format!("the builtin '{}'", symbol.name.as_str()), None)
 
                } else {
 
                    if let Some(import_id) = definition.imported_at {
 
                        let import = &ctx.heap[import_id];
 
                        return (
 
                            format!("the type '{}' imported here", symbol.name.as_str()),
 
                            Some(import.as_symbols().span)
 
                        );
 
                    } else {
 
                        // This is a defined symbol. So this must mean that the
 
                        // error was caused by it being defined.
 
                        debug_assert_eq!(definition.defined_in_module, module.root_id);
 

	
 
                    } else if definition.defined_in_module == module.root_id {
 
                        // This is a symbol defined in the same module
 
                        return (
 
                            format!("the type '{}' defined here", symbol.name.as_str()),
 
                            Some(definition.identifier_span)
 
                        )
 
                    } else {
 
                        // Not imported, not defined in the module, so must be
 
                        // a global
 
                        return (format!("the global '{}'", symbol.name.as_str()), None)
 
                    }
 
                }
 
            }
 
        }
 
    };
 

	
 
    let (new_symbol_msg, new_symbol_span) = get_symbol_span_and_msg(new_symbol);
 
    let (old_symbol_msg, old_symbol_span) = get_symbol_span_and_msg(old_symbol);
 
    let new_symbol_span = new_symbol_span.unwrap(); // because new symbols cannot be builtin
 

	
 
    match old_symbol_span {
 
        Some(old_symbol_span) => ParseError::new_error_at_span(
std/std.global.pdl
Show inline comments
 
#module std.global
 

	
 
func get<T>(in<T>) -> T #builtin
 
func put<T>(out<T>, T value) -> #type_void #builtin
 
func fires<T>(#type_portlike<T>) -> bool #builtin
 
func create<T>(#type_integerlike length) -> T[] #builtin
 
func length<T>(#type_arraylike<T> array) -> u32 #builtin
 
func assert(bool condition) -> #type_void #builtin
 
func print(string message) -> #type_void #builtin
 
\ No newline at end of file
 
func get<T>(in<T> input) -> T { #builtin }
 
func put<T>(out<T> output, T value) -> #type_void { #builtin }
 
func fires<T>(#type_portlike<T>) -> bool { #builtin }
 
func create<T>(#type_integerlike length) -> T[] { #builtin }
 
func length<T>(#type_arraylike<T> array) -> u32 { #builtin }
 
func assert(bool condition) -> #type_void { #builtin }
 
func print(string message) -> #type_void { #builtin }
 
\ No newline at end of file
0 comments (0 inline, 0 general)