From fc987660fdeec249016e88d84a05b8a59f204c69 2021-04-22 20:10:56 From: MH Date: 2021-04-22 20:10:56 Subject: [PATCH] WIP on compiler rearchitecting --- diff --git a/src/collections/mod.rs b/src/collections/mod.rs index b066ddec6c67ef24a2dbdd310c354577cadbf4e4..f5817538fd52a1d90cb624295b79e5866bb87993 100644 --- a/src/collections/mod.rs +++ b/src/collections/mod.rs @@ -1,3 +1,5 @@ mod string_pool; +mod scoped_buffer; -pub(crate) use string_pool::{StringPool, StringRef}; \ No newline at end of file +pub(crate) use string_pool::{StringPool, StringRef}; +pub(crate) use scoped_buffer::ScopedBuffer; \ No newline at end of file diff --git a/src/collections/scoped_buffer.rs b/src/collections/scoped_buffer.rs new file mode 100644 index 0000000000000000000000000000000000000000..d3eea0f78a8c985c0cab8b599d4d5f9eaa715f2e --- /dev/null +++ b/src/collections/scoped_buffer.rs @@ -0,0 +1,80 @@ +/// scoped_buffer.rs +/// +/// Solves the common pattern where we are performing some kind of recursive +/// pattern while using a temporary buffer. At the start, or during the +/// procedure, we push stuff into the buffer. At the end we take out what we +/// have put in. +/// +/// It is unsafe because we're using pointers to take care of borrowing rules. +/// The correctness of use is checked in debug mode. + +/// The buffer itself. This struct should be the shared buffer. The type `T` is +/// intentionally `Copy` such that it can be copied out and the underlying +/// container can be truncated. +pub(crate) struct ScopedBuffer { + pub inner: Vec, +} + +pub(crate) struct ScopedSection { + inner: *mut Vec, + start_size: u32, + #[cfg(debug_assertions)] cur_size: u32, +} + +impl ScopedBuffer { + pub(crate) fn new_reserved(capacity: usize) -> Self { + Self{ inner: Vec::with_capacity(capacity) } + } + + pub(crate) fn start_section(&mut self) -> ScopedSection { + let start_size = self.inner.len() as u32; + ScopedSection{ + inner: &mut self.inner, + start_size, + cur_size: start_size + } + } +} + +#[cfg(debug_assertions)] +impl Drop for ScopedBuffer { + fn drop(&mut self) { + // Make sure that everyone cleaned up the buffer neatly + debug_assert!(self.inner.is_empty(), "dropped non-empty scoped buffer"); + } +} + +impl ScopedSection { + #[inline] + pub(crate) fn push(&mut self, value: T) { + let vec = unsafe{&mut *self.inner}; + debug_assert!_eq(vec.len(), self.cur_size as usize, "trying to push onto section, but size is larger than expected"); + vec.push(value); + if cfg!(debug_assertions) { self.cur_size += 1; } + } + + #[inline] + pub(crate) fn forget(self) { + let vec = unsafe{&mut *self.inner}; + debug_assert_eq!(vec.len(), self.cur_size as usize, "trying to forget section, but size is larger than expected"); + vec.truncate(self.start_size as usize); + } + + #[inline] + pub(crate) fn into_vec(self) -> Vec { + let vec = unsafe{&mut *self.inner}; + debug_assert_eq!(vec.len(), self.cur_size as usize, "trying to turn section into vec, but size is larger than expected"); + let section = Vec::from(&vec[self.start_size as usize..]); + vec.truncate(self.start_size as usize); + section + } +} + +#[cfg(debug_assertions)] +impl Drop for ScopedBuffer { + fn drop(&mut self) { + // Make sure that the data was actually taken out of the scoped section + let vec = unsafe{&*self.inner}; + debug_assert_eq!(vec.len(), self.start_size as usize); + } +} \ No newline at end of file diff --git a/src/protocol/ast.rs b/src/protocol/ast.rs index 05d78f666bb6b336eee0c943a2bca7e9467d546e..4a1b4a0f12cb13cb1c2d1b01afebeea4e342142c 100644 --- a/src/protocol/ast.rs +++ b/src/protocol/ast.rs @@ -254,12 +254,6 @@ impl Root { } } -impl SyntaxElement for Root { - fn position(&self) -> InputPosition { - self.position - } -} - #[derive(Debug, Clone)] pub enum Pragma { Version(PragmaVersion), @@ -318,15 +312,6 @@ impl Import { } } -impl SyntaxElement for Import { - fn position(&self) -> InputPosition { - match self { - Import::Module(m) => m.position, - Import::Symbols(m) => m.position - } - } -} - #[derive(Debug, Clone)] pub struct ImportModule { pub this: ImportId, @@ -600,44 +585,52 @@ impl<'a> NamespacedIdentifierIter<'a> { } } -/// TODO: @types Remove the Message -> Byte hack at some point... -#[derive(Debug, Clone)] +#[derive(Debug, Clone, PartialOrd, Ord)] pub enum ParserTypeVariant { // Basic builtin Message, Bool, - UInt8, Uint16, UInt32, UInt64, + UInt8, UInt16, UInt32, UInt64, SInt8, SInt16, SInt32, SInt64, Character, String, // Literals (need to get concrete builtin type during typechecking) IntegerLiteral, Inferred, - // Complex builtins - Array(ParserTypeId), // array of a type - Input(ParserTypeId), // typed input endpoint of a channel - Output(ParserTypeId), // typed output endpoint of a channel - Symbolic(SymbolicParserType), // symbolic type (definition or polyarg) + // Builtins expecting one subsequent type + Array, + Input, + Output, + // User-defined types + PolymorphicArgument(DefinitionId, usize), // usize = index into polymorphic variables + Definition(DefinitionId, usize), // usize = number of following subtypes } impl ParserTypeVariant { - pub(crate) fn supports_polymorphic_args(&self) -> bool { - use ParserTypeVariant::*; + pub(crate) fn num_embedded(&self) -> usize { match self { - Message | Bool | Byte | Short | Int | Long | String | IntegerLiteral | Inferred => false, - _ => true + x if *x <= ParserTypeVariant::Inferred => 0, + x if *x <= ParserTypeVariant::Output => 1, + ParserTypeVariant::PolymorphicArgument(_, _) => 0, + ParserTypeVariant::Definition(_, num) => num, + _ => { debug_assert!(false); 0 }, } } } +pub struct ParserTypeElement { + // TODO: @cleanup, do we ever need the span of a user-defined type after + // constructing it? + pub full_span: InputSpan, // full span of type, including any polymorphic arguments + pub variant: ParserTypeVariant, +} + /// ParserType is a specification of a type during the parsing phase and initial /// linker/validator phase of the compilation process. These types may be /// (partially) inferred or represent literals (e.g. a integer whose bytesize is /// not yet determined). #[derive(Debug, Clone)] pub struct ParserType { - pub this: ParserTypeId, - pub pos: InputPosition, - pub variant: ParserTypeVariant, + pub elements: Vec } /// SymbolicParserType is the specification of a symbolic type. During the @@ -910,32 +903,17 @@ impl Variable { } } -impl SyntaxElement for Variable { - fn position(&self) -> InputPosition { - match self { - Variable::Parameter(decl) => decl.position(), - Variable::Local(decl) => decl.position(), - } - } -} - /// TODO: Remove distinction between parameter/local and add an enum to indicate /// the distinction between the two #[derive(Debug, Clone)] pub struct Parameter { pub this: ParameterId, - // Phase 1: parser - pub position: InputPosition, - pub parser_type: ParserTypeId, + // Phase 2: parser + pub span: InputSpan, + pub parser_type: ParserType, pub identifier: Identifier, } -impl SyntaxElement for Parameter { - fn position(&self) -> InputPosition { - self.position - } -} - #[derive(Debug, Clone)] pub struct Local { pub this: LocalId, @@ -946,11 +924,6 @@ pub struct Local { // Phase 2: linker pub relative_pos_in_block: u32, } -impl SyntaxElement for Local { - fn position(&self) -> InputPosition { - self.position - } -} #[derive(Debug, Clone)] pub enum Definition { @@ -968,7 +941,13 @@ impl Definition { _ => false } } - pub fn as_struct(&self) -> &StructDefinition { + pub(crate) fn as_struct(&self) -> &StructDefinition { + match self { + Definition::Struct(result) => result, + _ => panic!("Unable to cast 'Definition' to 'StructDefinition'"), + } + } + pub(crate) fn as_struct_mut(&mut self) -> &mut StructDefinition { match self { Definition::Struct(result) => result, _ => panic!("Unable to cast 'Definition' to 'StructDefinition'"), @@ -980,7 +959,13 @@ impl Definition { _ => false, } } - pub fn as_enum(&self) -> &EnumDefinition { + pub(crate) fn as_enum(&self) -> &EnumDefinition { + match self { + Definition::Enum(result) => result, + _ => panic!("Unable to cast 'Definition' to 'EnumDefinition'"), + } + } + pub(crate) fn as_enum_mut(&mut self) -> &mut EnumDefinition { match self { Definition::Enum(result) => result, _ => panic!("Unable to cast 'Definition' to 'EnumDefinition'"), @@ -992,19 +977,31 @@ impl Definition { _ => false, } } - pub fn as_union(&self) -> &UnionDefinition { + pub(crate) fn as_union(&self) -> &UnionDefinition { match self { Definition::Union(result) => result, _ => panic!("Unable to cast 'Definition' to 'UnionDefinition'"), } } + pub(crate) fn as_union_mut(&mut self) -> &mut UnionDefinition { + match self { + Definition::Union(result) => result, + _ => panic!("Unable to cast 'Definition' to 'UnionDefinition'"), + } + } pub fn is_component(&self) -> bool { match self { Definition::Component(_) => true, _ => false, } } - pub fn as_component(&self) -> &ComponentDefinition { + pub(crate) fn as_component(&self) -> &ComponentDefinition { + match self { + Definition::Component(result) => result, + _ => panic!("Unable to cast `Definition` to `Component`"), + } + } + pub(crate) fn as_component_mut(&mut self) -> &mut ComponentDefinition { match self { Definition::Component(result) => result, _ => panic!("Unable to cast `Definition` to `Component`"), @@ -1016,7 +1013,13 @@ impl Definition { _ => false, } } - pub fn as_function(&self) -> &FunctionDefinition { + pub(crate) fn as_function(&self) -> &FunctionDefinition { + match self { + Definition::Function(result) => result, + _ => panic!("Unable to cast `Definition` to `Function`"), + } + } + pub(crate) fn as_function_mut(&mut self) -> &mut FunctionDefinition { match self { Definition::Function(result) => result, _ => panic!("Unable to cast `Definition` to `Function`"), @@ -1048,16 +1051,13 @@ impl Definition { _ => panic!("cannot retrieve body (for EnumDefinition or StructDefinition)") } } -} - -impl SyntaxElement for Definition { - fn position(&self) -> InputPosition { + pub fn poly_vars(&self) -> &Vec { match self { - Definition::Struct(def) => def.position, - Definition::Enum(def) => def.position, - Definition::Union(def) => def.position, - Definition::Component(def) => def.position(), - Definition::Function(def) => def.position(), + Definition::Struct(def) => &def.poly_vars, + Definition::Enum(def) => &def.poly_vars, + Definition::Union(def) => &def.poly_vars, + Definition::Component(def) => &def.poly_vars, + Definition::Function(def) => &def.poly_vars, } } } @@ -1079,23 +1079,25 @@ impl VariableScope for Definition { #[derive(Debug, Clone)] pub struct StructFieldDefinition { + pub span: InputSpan, pub field: Identifier, - pub parser_type: ParserTypeId, + pub parser_type: ParserType, } #[derive(Debug, Clone)] pub struct StructDefinition { pub this: StructDefinitionId, - // Phase 1: parser + // Phase 1: symbol scanning pub span: InputSpan, pub identifier: Identifier, pub poly_vars: Vec, + // Phase 2: parsing pub fields: Vec } impl StructDefinition { - pub(crate) fn new_empty(this: StructDefinitionId, span: InputSpan, identifier: Identifier) -> Self { - Self{ this, span, identifier, poly_vars: Vec::new(), fields: Vec::new() } + pub(crate) fn new_empty(this: StructDefinitionId, span: InputSpan, identifier: Identifier, poly_vars: Vec) -> Self { + Self{ this, span, identifier, poly_vars, fields: Vec::new() } } } @@ -1107,7 +1109,6 @@ pub enum EnumVariantValue { #[derive(Debug, Clone)] pub struct EnumVariantDefinition { - pub position: InputPosition, pub identifier: Identifier, pub value: EnumVariantValue, } @@ -1115,28 +1116,29 @@ pub struct EnumVariantDefinition { #[derive(Debug, Clone)] pub struct EnumDefinition { pub this: EnumDefinitionId, - // Phase 1: parser + // Phase 1: symbol scanning pub span: InputSpan, pub identifier: Identifier, pub poly_vars: Vec, + // Phase 2: parsing pub variants: Vec, } impl EnumDefinition { - pub(crate) fn new_empty(this: EnumDefinitionId, span: InputSpan, identifier: Identifier) -> Self { - Self{ this, span, identifier, poly_vars: Vec::new(), variants: Vec::new() } + pub(crate) fn new_empty(this: EnumDefinitionId, span: InputSpan, identifier: Identifier, poly_vars: Vec) -> Self { + Self{ this, span, identifier, poly_vars, variants: Vec::new() } } } #[derive(Debug, Clone)] pub enum UnionVariantValue { None, - Embedded(Vec), + Embedded(Vec), } #[derive(Debug, Clone)] pub struct UnionVariantDefinition { - pub position: InputPosition, + pub span: InputSpan, pub identifier: Identifier, pub value: UnionVariantValue, } @@ -1144,16 +1146,17 @@ pub struct UnionVariantDefinition { #[derive(Debug, Clone)] pub struct UnionDefinition { pub this: UnionDefinitionId, - // Phase 1: parser + // Phase 1: symbol scanning pub span: InputSpan, pub identifier: Identifier, pub poly_vars: Vec, + // Phase 2: parsing pub variants: Vec, } impl UnionDefinition { - pub(crate) fn new_empty(this: UnionDefinitionId, span: InputSpan, identifier: Identifier) -> Self { - Self{ this, span, identifier, poly_vars: Vec::new(), variants: Vec::new() } + pub(crate) fn new_empty(this: UnionDefinitionId, span: InputSpan, identifier: Identifier, poly_vars: Vec) -> Self { + Self{ this, span, identifier, poly_vars, variants: Vec::new() } } } @@ -1166,62 +1169,50 @@ pub enum ComponentVariant { #[derive(Debug, Clone)] pub struct ComponentDefinition { pub this: ComponentDefinitionId, - // Phase 1: parser + // Phase 1: symbol scanning pub span: InputSpan, pub variant: ComponentVariant, pub identifier: Identifier, pub poly_vars: Vec, + // Phase 2: parsing pub parameters: Vec, pub body: StatementId, } impl ComponentDefinition { - pub(crate) fn new_empty(this: ComponentDefinitionId, span: InputSpan, variant: ComponentVariant, identifier: Identifier) -> Self { + pub(crate) fn new_empty(this: ComponentDefinitionId, span: InputSpan, variant: ComponentVariant, identifier: Identifier, poly_vars: Vec) -> Self { Self{ - this, span, variant, identifier, - poly_vars: Vec::new(), + this, span, variant, identifier, poly_vars, parameters: Vec::new(), body: StatementId::new_invalid() } } } -impl SyntaxElement for ComponentDefinition { - fn position(&self) -> InputPosition { - self.position - } -} - #[derive(Debug, Clone)] pub struct FunctionDefinition { pub this: FunctionDefinitionId, - // Phase 1: parser + // Phase 1: symbol scanning pub span: InputSpan, - pub return_type: ParserTypeId, pub identifier: Identifier, pub poly_vars: Vec, + // Phase 2: parsing + pub return_types: Vec, pub parameters: Vec, pub body: StatementId, } impl FunctionDefinition { - pub(crate) fn new_empty(this: FunctionDefinitionId, span: InputSpan, identifier: Identifier) -> Self { + pub(crate) fn new_empty(this: FunctionDefinitionId, span: InputSpan, identifier: Identifier, poly_vars: Vec) -> Self { Self { - this, span, identifier, + this, span, identifier, poly_vars, return_type: ParserTypeId::new_invalid(), - poly_vars: Vec::new(), parameters: Vec::new(), body: StatementId::new_invalid(), } } } -impl SyntaxElement for FunctionDefinition { - fn position(&self) -> InputPosition { - self.position - } -} - #[derive(Debug, Clone)] pub enum Statement { Block(BlockStatement), @@ -1432,35 +1423,11 @@ impl Statement { } } -impl SyntaxElement for Statement { - fn position(&self) -> InputPosition { - match self { - Statement::Block(stmt) => stmt.position(), - Statement::Local(stmt) => stmt.position(), - Statement::Skip(stmt) => stmt.position(), - Statement::Labeled(stmt) => stmt.position(), - Statement::If(stmt) => stmt.position(), - Statement::EndIf(stmt) => stmt.position(), - Statement::While(stmt) => stmt.position(), - Statement::EndWhile(stmt) => stmt.position(), - Statement::Break(stmt) => stmt.position(), - Statement::Continue(stmt) => stmt.position(), - Statement::Synchronous(stmt) => stmt.position(), - Statement::EndSynchronous(stmt) => stmt.position(), - Statement::Return(stmt) => stmt.position(), - Statement::Assert(stmt) => stmt.position(), - Statement::Goto(stmt) => stmt.position(), - Statement::New(stmt) => stmt.position(), - Statement::Expression(stmt) => stmt.position(), - } - } -} - #[derive(Debug, Clone)] pub struct BlockStatement { pub this: BlockStatementId, // Phase 1: parser - pub position: InputPosition, + pub span: InputSpan, pub statements: Vec, // Phase 2: linker pub parent_scope: Option, @@ -1498,12 +1465,6 @@ impl BlockStatement { } } -impl SyntaxElement for BlockStatement { - fn position(&self) -> InputPosition { - self.position - } -} - impl VariableScope for BlockStatement { fn parent_scope(&self, _h: &Heap) -> Option { self.parent_scope.clone() @@ -1552,15 +1513,6 @@ impl LocalStatement { } } -impl SyntaxElement for LocalStatement { - fn position(&self) -> InputPosition { - match self { - LocalStatement::Memory(stmt) => stmt.position(), - LocalStatement::Channel(stmt) => stmt.position(), - } - } -} - #[derive(Debug, Clone)] pub struct MemoryStatement { pub this: MemoryStatementId, @@ -1571,12 +1523,6 @@ pub struct MemoryStatement { pub next: Option, } -impl SyntaxElement for MemoryStatement { - fn position(&self) -> InputPosition { - self.position - } -} - /// ChannelStatement is the declaration of an input and output port associated /// with the same channel. Note that the polarity of the ports are from the /// point of view of the component. So an output port is something that a @@ -1594,12 +1540,6 @@ pub struct ChannelStatement { pub next: Option, } -impl SyntaxElement for ChannelStatement { - fn position(&self) -> InputPosition { - self.position - } -} - #[derive(Debug, Clone)] pub struct SkipStatement { pub this: SkipStatementId, @@ -1609,12 +1549,6 @@ pub struct SkipStatement { pub next: Option, } -impl SyntaxElement for SkipStatement { - fn position(&self) -> InputPosition { - self.position - } -} - #[derive(Debug, Clone)] pub struct LabeledStatement { pub this: LabeledStatementId, @@ -1627,12 +1561,6 @@ pub struct LabeledStatement { pub in_sync: Option, } -impl SyntaxElement for LabeledStatement { - fn position(&self) -> InputPosition { - self.position - } -} - #[derive(Debug, Clone)] pub struct IfStatement { pub this: IfStatementId, @@ -1645,12 +1573,6 @@ pub struct IfStatement { pub end_if: Option, } -impl SyntaxElement for IfStatement { - fn position(&self) -> InputPosition { - self.position - } -} - #[derive(Debug, Clone)] pub struct EndIfStatement { pub this: EndIfStatementId, @@ -1660,12 +1582,6 @@ pub struct EndIfStatement { pub next: Option, } -impl SyntaxElement for EndIfStatement { - fn position(&self) -> InputPosition { - self.position - } -} - #[derive(Debug, Clone)] pub struct WhileStatement { pub this: WhileStatementId, @@ -1678,12 +1594,6 @@ pub struct WhileStatement { pub in_sync: Option, } -impl SyntaxElement for WhileStatement { - fn position(&self) -> InputPosition { - self.position - } -} - #[derive(Debug, Clone)] pub struct EndWhileStatement { pub this: EndWhileStatementId, @@ -1693,12 +1603,6 @@ pub struct EndWhileStatement { pub next: Option, } -impl SyntaxElement for EndWhileStatement { - fn position(&self) -> InputPosition { - self.position - } -} - #[derive(Debug, Clone)] pub struct BreakStatement { pub this: BreakStatementId, @@ -1709,12 +1613,6 @@ pub struct BreakStatement { pub target: Option, } -impl SyntaxElement for BreakStatement { - fn position(&self) -> InputPosition { - self.position - } -} - #[derive(Debug, Clone)] pub struct ContinueStatement { pub this: ContinueStatementId, @@ -1725,12 +1623,6 @@ pub struct ContinueStatement { pub target: Option, } -impl SyntaxElement for ContinueStatement { - fn position(&self) -> InputPosition { - self.position - } -} - #[derive(Debug, Clone)] pub struct SynchronousStatement { pub this: SynchronousStatementId, @@ -1743,12 +1635,6 @@ pub struct SynchronousStatement { pub parent_scope: Option, } -impl SyntaxElement for SynchronousStatement { - fn position(&self) -> InputPosition { - self.position - } -} - impl VariableScope for SynchronousStatement { fn parent_scope(&self, _h: &Heap) -> Option { self.parent_scope.clone() @@ -1774,12 +1660,6 @@ pub struct EndSynchronousStatement { pub next: Option, } -impl SyntaxElement for EndSynchronousStatement { - fn position(&self) -> InputPosition { - self.position - } -} - #[derive(Debug, Clone)] pub struct ReturnStatement { pub this: ReturnStatementId, @@ -1788,12 +1668,6 @@ pub struct ReturnStatement { pub expression: ExpressionId, } -impl SyntaxElement for ReturnStatement { - fn position(&self) -> InputPosition { - self.position - } -} - #[derive(Debug, Clone)] pub struct AssertStatement { pub this: AssertStatementId, @@ -1804,12 +1678,6 @@ pub struct AssertStatement { pub next: Option, } -impl SyntaxElement for AssertStatement { - fn position(&self) -> InputPosition { - self.position - } -} - #[derive(Debug, Clone)] pub struct GotoStatement { pub this: GotoStatementId, @@ -1820,12 +1688,6 @@ pub struct GotoStatement { pub target: Option, } -impl SyntaxElement for GotoStatement { - fn position(&self) -> InputPosition { - self.position - } -} - #[derive(Debug, Clone)] pub struct NewStatement { pub this: NewStatementId, @@ -1836,12 +1698,6 @@ pub struct NewStatement { pub next: Option, } -impl SyntaxElement for NewStatement { - fn position(&self) -> InputPosition { - self.position - } -} - #[derive(Debug, Clone)] pub struct ExpressionStatement { pub this: ExpressionStatementId, @@ -1852,12 +1708,6 @@ pub struct ExpressionStatement { pub next: Option, } -impl SyntaxElement for ExpressionStatement { - fn position(&self) -> InputPosition { - self.position - } -} - #[derive(Debug, PartialEq, Eq, Clone, Copy)] pub enum ExpressionParent { None, // only set during initial parsing @@ -2043,25 +1893,6 @@ impl Expression { } } -impl SyntaxElement for Expression { - fn position(&self) -> InputPosition { - match self { - Expression::Assignment(expr) => expr.position(), - Expression::Binding(expr) => expr.position, - Expression::Conditional(expr) => expr.position(), - Expression::Binary(expr) => expr.position(), - Expression::Unary(expr) => expr.position(), - Expression::Indexing(expr) => expr.position(), - Expression::Slicing(expr) => expr.position(), - Expression::Select(expr) => expr.position(), - Expression::Array(expr) => expr.position(), - Expression::Literal(expr) => expr.position(), - Expression::Call(expr) => expr.position(), - Expression::Variable(expr) => expr.position(), - } - } -} - #[derive(Debug, Clone)] pub enum AssignmentOperator { Set, @@ -2080,23 +1911,17 @@ pub enum AssignmentOperator { #[derive(Debug, Clone)] pub struct AssignmentExpression { pub this: AssignmentExpressionId, - // Phase 1: parser - pub position: InputPosition, + // Phase 2: parser + pub span: InputSpan, // of the operator pub left: ExpressionId, pub operation: AssignmentOperator, pub right: ExpressionId, - // Phase 2: linker + // Phase 3: linker pub parent: ExpressionParent, - // Phase 3: type checking + // Phase 4: type checking pub concrete_type: ConcreteType, } -impl SyntaxElement for AssignmentExpression { - fn position(&self) -> InputPosition { - self.position - } -} - #[derive(Debug, Clone)] pub struct BindingExpression { pub this: BindingExpressionId, @@ -2114,7 +1939,7 @@ pub struct BindingExpression { pub struct ConditionalExpression { pub this: ConditionalExpressionId, // Phase 1: parser - pub position: InputPosition, + pub span: InputSpan, // of question mark operator pub test: ExpressionId, pub true_expression: ExpressionId, pub false_expression: ExpressionId, @@ -2124,12 +1949,6 @@ pub struct ConditionalExpression { pub concrete_type: ConcreteType, } -impl SyntaxElement for ConditionalExpression { - fn position(&self) -> InputPosition { - self.position - } -} - #[derive(Debug, Clone, PartialEq, Eq)] pub enum BinaryOperator { Concatenate, @@ -2157,7 +1976,7 @@ pub enum BinaryOperator { pub struct BinaryExpression { pub this: BinaryExpressionId, // Phase 1: parser - pub position: InputPosition, + pub span: InputSpan, // of the operator pub left: ExpressionId, pub operation: BinaryOperator, pub right: ExpressionId, @@ -2167,12 +1986,6 @@ pub struct BinaryExpression { pub concrete_type: ConcreteType, } -impl SyntaxElement for BinaryExpression { - fn position(&self) -> InputPosition { - self.position - } -} - #[derive(Debug, Clone, PartialEq, Eq)] pub enum UnaryOperation { Positive, @@ -2189,7 +2002,7 @@ pub enum UnaryOperation { pub struct UnaryExpression { pub this: UnaryExpressionId, // Phase 1: parser - pub position: InputPosition, + pub span: InputSpan, // of the operator pub operation: UnaryOperation, pub expression: ExpressionId, // Phase 2: linker @@ -2198,17 +2011,11 @@ pub struct UnaryExpression { pub concrete_type: ConcreteType, } -impl SyntaxElement for UnaryExpression { - fn position(&self) -> InputPosition { - self.position - } -} - #[derive(Debug, Clone)] pub struct IndexingExpression { pub this: IndexingExpressionId, // Phase 1: parser - pub position: InputPosition, + pub span: InputSpan, pub subject: ExpressionId, pub index: ExpressionId, // Phase 2: linker @@ -2217,17 +2024,11 @@ pub struct IndexingExpression { pub concrete_type: ConcreteType, } -impl SyntaxElement for IndexingExpression { - fn position(&self) -> InputPosition { - self.position - } -} - #[derive(Debug, Clone)] pub struct SlicingExpression { pub this: SlicingExpressionId, // Phase 1: parser - pub position: InputPosition, + pub span: InputSpan, // from '[' to ']'; pub subject: ExpressionId, pub from_index: ExpressionId, pub to_index: ExpressionId, @@ -2237,17 +2038,11 @@ pub struct SlicingExpression { pub concrete_type: ConcreteType, } -impl SyntaxElement for SlicingExpression { - fn position(&self) -> InputPosition { - self.position - } -} - #[derive(Debug, Clone)] pub struct SelectExpression { pub this: SelectExpressionId, // Phase 1: parser - pub position: InputPosition, + pub span: InputSpan, // of the '.' pub subject: ExpressionId, pub field: Field, // Phase 2: linker @@ -2256,17 +2051,11 @@ pub struct SelectExpression { pub concrete_type: ConcreteType, } -impl SyntaxElement for SelectExpression { - fn position(&self) -> InputPosition { - self.position - } -} - #[derive(Debug, Clone)] pub struct ArrayExpression { pub this: ArrayExpressionId, // Phase 1: parser - pub position: InputPosition, + pub span: InputSpan, // from the opening to closing delimiter pub elements: Vec, // Phase 2: linker pub parent: ExpressionParent, @@ -2274,12 +2063,6 @@ pub struct ArrayExpression { pub concrete_type: ConcreteType, } -impl SyntaxElement for ArrayExpression { - fn position(&self) -> InputPosition { - self.position - } -} - // TODO: @tokenizer Symbolic function calls are ambiguous with union literals // that accept embedded values (although the polymorphic arguments are placed // differently). To prevent double work we parse as CallExpression, and during @@ -2298,12 +2081,6 @@ pub struct CallExpression { pub concrete_type: ConcreteType, } -impl SyntaxElement for CallExpression { - fn position(&self) -> InputPosition { - self.position - } -} - #[derive(Debug, Clone)] pub enum Method { Get, @@ -2323,7 +2100,7 @@ pub struct MethodSymbolic { pub struct LiteralExpression { pub this: LiteralExpressionId, // Phase 1: parser - pub position: InputPosition, + pub span: InputSpan, pub value: Literal, // Phase 2: linker pub parent: ExpressionParent, @@ -2331,21 +2108,13 @@ pub struct LiteralExpression { pub concrete_type: ConcreteType, } -impl SyntaxElement for LiteralExpression { - fn position(&self) -> InputPosition { - self.position - } -} - -type LiteralCharacter = Vec; -type LiteralInteger = i64; // TODO: @int_literal - #[derive(Debug, Clone)] pub enum Literal { Null, // message True, False, - Character(LiteralCharacter), + Character(char), + String(StringRef<'static>), Integer(LiteralInteger), Struct(LiteralStruct), Enum(LiteralEnum), @@ -2386,6 +2155,12 @@ impl Literal { } } +#[derive(Debug, Clone)] +pub struct LiteralInteger { + pub(crate) unsigned_value: u64, + pub(crate) negated: bool, // for constant expression evaluation, TODO +} + #[derive(Debug, Clone)] pub struct LiteralStructField { // Phase 1: parser @@ -2405,10 +2180,6 @@ pub struct LiteralStruct { pub(crate) definition: Option } -// TODO: @tokenizer Enum literals are ambiguous with union literals that do not -// accept embedded values. To prevent double work for now we parse as a -// LiteralEnum, and during validation we may transform the expression into a -// union literal. #[derive(Debug, Clone)] pub struct LiteralEnum { // Phase 1: parser @@ -2441,10 +2212,4 @@ pub struct VariableExpression { pub parent: ExpressionParent, // Phase 3: type checking pub concrete_type: ConcreteType, -} - -impl SyntaxElement for VariableExpression { - fn position(&self) -> InputPosition { - self.position - } -} +} \ No newline at end of file diff --git a/src/protocol/inputsource.rs b/src/protocol/inputsource.rs index 1442d6452d2892b47b7b5d09b2f72b3bbcf9d591..2a12406576d403fccc1a83a713ac1af31e32dd04 100644 --- a/src/protocol/inputsource.rs +++ b/src/protocol/inputsource.rs @@ -178,10 +178,6 @@ impl fmt::Display for InputPosition { } } -pub trait SyntaxElement { - fn position(&self) -> InputPosition; -} - #[derive(Debug)] pub enum ParseErrorType { Info, diff --git a/src/protocol/parser/mod.rs b/src/protocol/parser/mod.rs index 4809745fac292421922b8a6c613c774c14c72fe1..b4255992c152a1ac0e99557098a35cd88b71e6cb 100644 --- a/src/protocol/parser/mod.rs +++ b/src/protocol/parser/mod.rs @@ -29,6 +29,10 @@ use crate::protocol::lexer::*; use std::collections::HashMap; use crate::protocol::ast_printer::ASTWriter; +pub(crate) const LIMIT_NUM_TYPE_NODES: usize = 64; +pub(crate) const LIMIT_NUM_POLY_VARS: usize = 64; +pub(crate) const LIMIT_NUM_PROC_ARGS: usize = 64; + #[derive(PartialEq, Eq)] pub enum ModuleCompilationPhase { Source, // only source is set diff --git a/src/protocol/parser/pass_definitions.rs b/src/protocol/parser/pass_definitions.rs index 3bbfe219acd57fc1d373aadbeca584564733403d..3b440d77749e648e983746e5b8501cc8566c708f 100644 --- a/src/protocol/parser/pass_definitions.rs +++ b/src/protocol/parser/pass_definitions.rs @@ -3,13 +3,19 @@ use super::symbol_table2::*; use super::{Module, ModuleCompilationPhase, PassCtx}; use super::tokens::*; use super::token_parsing::*; -use crate::protocol::input_source2::{InputSource2 as InputSource, InputSpan, ParseError}; +use crate::protocol::input_source2::{InputSource2 as InputSource, InputPosition2 as InputPosition, InputSpan, ParseError}; use crate::collections::*; /// Parses all the tokenized definitions into actual AST nodes. pub(crate) struct PassDefinitions { + buffer: String, identifiers: Vec, struct_fields: Vec, + enum_variants: Vec, + union_variants: Vec, + parameters: Vec, + expressions: ScopedBuffer, + parser_types: Vec, } impl PassDefinitions { @@ -19,6 +25,8 @@ impl PassDefinitions { debug_assert_eq!(module.phase, ModuleCompilationPhase::ImportsResolved); debug_assert_eq!(module_range.range_kind, TokenRangeKind::Module); + // TODO: Very important to go through ALL ranges of the module so that we parse the entire + // input source. Only skip the ones we're certain we've handled before. let mut range_idx = module_range.first_child_idx; loop { let range_idx_usize = range_idx as usize; @@ -74,43 +82,673 @@ impl PassDefinitions { Ok(()) } + // TODO: @Cleanup, still not sure about polymorphic variable parsing. Pre-parsing the variables + // allows us to directly construct proper ParserType trees. But this does require two lookups + // of the corresponding definition. fn visit_struct_definition( &mut self, module: &Module, iter: &mut TokenIter, ctx: &mut PassCtx ) -> Result<(), ParseError> { - // Consume struct and name of struct - let struct_span = consume_exact_ident(&module.source, iter, b"struct"); + consume_exact_ident(&module.source, iter, KW_STRUCT)?; let (ident_text, _) = consume_ident(&module.source, iter)?; - // We should have preallocated the definition in the heap, retrieve its identifier - let definition_id = ctx.symbols.get_symbol_by_name_defined_in_scope(SymbolScope::Module(module.root_id), ident_text) + // Retrieve preallocated DefinitionId + let module_scope = SymbolScope::Module(module.root_id); + let definition_id = ctx.symbols.get_symbol_by_name_defined_in_scope(module_scope, ident_text) .unwrap().variant.as_definition().definition_id; + let poly_vars = ctx.heap[definition_id].poly_vars(); - consume_polymorphic_vars(source, iter, ctx, &mut self.identifiers)?; + // Parse struct definition + consume_polymorphic_vars_spilled(source, iter)?; debug_assert!(self.struct_fields.is_empty()); consume_comma_separated( TokenKind::OpenCurly, TokenKind::CloseCurly, source, iter, |source, iter| { + let parser_type = consume_parser_type( + source, iter, &ctx.symbols, &ctx.heap, poly_vars, module_scope, definition_id, false + )?; let field = consume_ident_interned(source, iter, ctx)?; - - StructFieldDefinition{ field, parser_type } + Ok(StructFieldDefinition{ field, parser_type }) }, &mut self.struct_fields, "a struct field", "a list of struct fields" + )?; + + // Transfer to preallocated definition + let struct_def = ctx.heap[definition_id].as_struct_mut(); + struct_def.fields.clone_from(&self.struct_fields); + self.struct_fields.clear(); + + Ok(()) + } + + fn visit_enum_definition( + &mut self, module: &Module, iter: &mut TokenIter, ctx: &mut PassCtx + ) -> Result<(), ParseError> { + consume_exact_ident(&module.source, iter, KW_ENUM)?; + let (ident_text, _) = consume_ident(&module.source, iter)?; + + // Retrieve preallocated DefinitionId + let module_scope = SymbolScope::Module(module.root_id); + let definition_id = ctx.symbols.get_symbol_by_name_defined_in_scope(module_scope, ident_text) + .unwrap().variant.as_definition().definition_id; + let poly_vars = ctx.heap[definition_id].poly_vars(); + + // Parse enum definition + consume_polymorphic_vars_spilled(source, iter)?; + debug_assert!(self.enum_variants.is_empty()); + consume_comma_separated( + TokenKind::OpenCurly, TokenKind::CloseCurly, source, iter, + |source, iter| { + let identifier = consume_ident_interned(source, iter, ctx)?; + let value = if iter.next() == Some(TokenKind::Equal) { + iter.consume(); + let (variant_number, _) = consume_integer_literal(source, iter, &mut self.buffer)?; + EnumVariantValue::Integer(variant_number as i64) // TODO: @int + } else { + EnumVariantValue::None + }; + Ok(EnumVariantDefinition{ identifier, value }) + }, + &mut self.enum_variants, "an enum variant", "a list of enum variants" + )?; + + // Transfer to definition + let enum_def = ctx.heap[definition_id].as_enum_mut(); + enum_def.variants.clone_from(&self.enum_variants); + self.enum_variants.clear(); + + Ok(()) + } + + fn visit_union_definition( + &mut self, module: &Module, iter: &mut TokenIter, ctx: &mut PassCtx + ) -> Result<(), ParseError> { + consume_exact_ident(&module.source, iter, KW_UNION)?; + let (ident_text, _) = consume_ident(&module.source, iter)?; + + // Retrieve preallocated DefinitionId + let module_scope = SymbolScope::Module(module.root_id); + let definition_id = ctx.symbols.get_symbol_by_name_defined_in_scope(module_scope, ident_text) + .unwrap().variant.as_definition().definition_id; + let poly_vars = ctx.heap[definition_id].poly_vars(); + + // Parse union definition + consume_polymorphic_vars_spilled(source, iter)?; + debug_assert!(self.union_variants.is_empty()); + consume_comma_separated( + TokenKind::OpenCurly, TokenKind::CloseCurly, source, iter, + |source, iter| { + let identifier = consume_ident_interned(source, iter, ctx)?; + let close_pos = identifier.span.end; + let has_embedded = maybe_consume_comma_separated( + TokenKind::OpenParen, TokenKind::CloseParen, source, iter, + |source, iter| { + consume_parser_type( + source, iter, &ctx.symbols, &ctx.heap, poly_vars, + module_scope, definition_id, false + ) + }, + &mut self.parser_types, "an embedded type", Some(&mut close_pos) + )?; + let value = if has_embedded { + UnionVariantValue::Embedded(self.parser_types.clone()) + } else { + UnionVariantValue::None + }; + self.parser_types.clear(); + + Ok(UnionVariantDefinition{ + span: InputSpan::from_positions(identifier.span.begin, close_pos), + identifier, + value + }) + }, + &mut self.union_variants, "a union variant", "a list of union variants", None + )?; + + // Transfer to AST + let union_def = ctx.heap[definition_id].as_union_mut(); + union_def.variants.clone_from(&self.union_variants); + self.union_variants.clear(); + + Ok(()) + } + + fn visit_function_definition( + &mut self, module: &Module, iter: &mut TokenIter, ctx: &mut PassCtx + ) -> Result<(), ParseError> { + consume_exact_ident(&module.source, iter, KW_FUNCTION)?; + let (ident_text, _) = consume_ident(&module.source, iter)?; + + // Retrieve preallocated DefinitionId + let module_scope = SymbolScope::Module(module.root_id); + let definition_id = ctx.symbols.get_symbol_by_name_defined_in_scope(module_scope, ident_text) + .unwrap().variant.as_definition().definition_id; + let poly_vars = ctx.heap[definition_id].poly_vars(); + + // Parse function's argument list + consume_parameter_list( + source, iter, ctx, &mut self.parameters, poly_vars, module_scope, definition_id + )?; + let parameters = self.parameters.clone(); + self.parameters.clear(); + + // Consume return types + consume_comma_separated( + TokenKind::ArrowRight, TokenKind::OpenCurly, &module.source, iter, + |source, iter| { + consume_parser_type(source, iter, &ctx.symbols, &ctx.heap, poly_vars, module_scope, definition_id, false) + }, + &mut self.parser_types, "a return type", "the return types", None + )?; + let return_types = self.parser_types.clone(); + self.parser_types.clear(); + + // Consume block + } + + fn consume_statement( + &mut self, module: &Module, iter: &mut TokenIter, ctx: &mut PassCtx + ) -> Result { + let next = iter.next().expect("consume_statement has a next token"); + + if next == TokenKind::OpenCurly { + return self.consume_block_statement(module, iter, ctx)?.upcast(); + } else if next == TokenKind::Ident { + let (ident, _) = consume_any_ident(source, iter)?; + if ident == KW_STMT_IF { + return self.consume_if_statement(module, iter, ctx)?; + } else if ident == KW_STMT_WHILE { + return self.consume_while_statement(module, iter, ctx)?; + } else if ident == KW_STMT_BREAK { + return self.consume_break_statement(module, iter, ctx)?; + } else if ident == KW_STMT_CONTINUE { + return self.consume_continue_statement(module, iter, ctx)?; + } else if ident == KW_STMT_SYNC { + return self.consume_synchronous_statement(module, iter, ctx)?; + } else if ident == KW_STMT_RETURN { + return self.consume_return_statement(module, iter, ctx)?; + } else if ident == KW_STMT_ASSERT { + // TODO: Unify all builtin function calls as expressions + return self.consume_assert_statement(module, iter, ctx)?; + } else if ident == KW_STMT_GOTO { + return self.consume_goto_statement(module, iter, ctx)?; + } else if ident == KW_STMT_NEW { + return self.consume_new_statement(module, iter, ctx)?; + } else if iter.peek() == Some(TokenKind::Colon) { + return self.consume_labeled_statement(module, iter, ctx)?; + } + } + + // If here then attempt to parse as expression + return self.consume_expr_statement(module, iter, ctx)?; + } + + fn consume_block_statement( + &mut self, module: &Module, iter: &mut TokenIter, ctx: &mut PassCtx + ) -> Result { + let open_span = consume_token(source, iter, TokenKind::OpenCurly)?; + self.consume_block_statement_without_leading_curly(module, iter, ctx, open_span.begin) + } + + fn consume_block_statement_without_leading_curly( + &mut self, module: &Module, iter: &mut TokenIter, ctx: &mut PassCtx, open_curly_pos: InputPosition + ) -> Result { + let mut statements = Vec::new(); + let mut next = iter.next(); + while next.is_some() && next != Some(TokenKind::CloseCurly) { + + } + + let mut block_span = consume_token(&module.source, iter, TokenKind::CloseCurly)?; + block_span.begin = open_curly_pos; + + Ok(ctx.heap.alloc_block_statement(|this| BlockStatement{ + this, + span: block_span, + statements, + parent_scope: None, + relative_pos_in_parent: 0, + locals: Vec::new(), + labels: Vec::new(), + })) + } + + fn consume_if_statement( + &mut self, module: &Module, iter: &mut TokenIter, ctx: &mut PassCtx + ) -> Result { + consume_exact_ident(&module.source, iter, KW_STMT_IF)?; + let test = consume_parenthesized_expression() + } + + //-------------------------------------------------------------------------- + // Expression Parsing + //-------------------------------------------------------------------------- + + fn consume_expression( + &mut self, module: &Module, iter: &mut TokenIter, ctx: &mut PassCtx + ) -> Result { + self.consume_assignment_expression(module, iter, ctx) + } + + fn consume_assignment_expression( + &mut self, module: &Module, iter: &mut TokenIter, ctx: &mut PassCtx + ) -> Result { + // Utility to convert token into assignment operator + fn parse_assignment_operator(token: Option) -> Option { + use TokenKind as TK; + use AssignmentOperator as AO; + + if token.is_none() { + return None + } + + let matched = match token.unwrap() { + TK::Equal => Some(AO::Set), + TK::StarEquals => Some(AO::Multiplied), + TK::SlashEquals => Some(AO::Divided), + TK::PercentEquals => Some(AO::Remained), + TK::PlusEquals => Some(AO::Added), + TK::MinusEquals => Some(AO::Subtracted), + TK::ShiftLeftEquals => Some(AO::ShiftedLeft), + TK::ShiftRightEquals => Some(AO::ShiftedRight), + TK::AndEquals => Some(AO::BitwiseAnded), + TK::CaretEquals => Some(AO::BitwiseXored), + TK::OrEquals => Some(AO::BitwiseOred), + _ => None + }; + } + + let expr = self.consume_conditional_expression(module, iter, ctx)?; + if let Some(operation) = parse_assignment_operator(iter.next()) { + let span = iter.next_span(); + iter.consume(); + + let left = expr; + let right = self.consume_expression(module, iter, ctx)?; + + Ok(ctx.heap.alloc_assignment_expression(|this| AssignmentExpression{ + this, span, left, operation, right, + parent: ExpressionParent::None, + concrete_type: ConcreteType::default(), + }).upcast()) + } else { + Ok(expr) + } + } + + fn consume_conditional_expression( + &mut self, module: &Module, iter: &mut TokenIter, ctx: &mut PassCtx + ) -> Result { + let result = self.consume_concat_expression(module, iter, ctx)?; + if let Some(TokenKind::Question) = iter.next() { + let span = iter.next_span(); + iter.consume(); + + let test = result; + let true_expression = self.consume_expression(module, iter, ctx)?; + consume_token(source, iter, TokenKind::Colon)?; + let false_expression = self.consume_expression(module, iter, ctx)?; + Ok(ctx.heap.alloc_conditional_expression(|this| ConditionalExpression{ + this, span, test, true_expression, false_expression, + parent: ExpressionParent::None, + concrete_type: ConcreteType::default(), + }).upcast()) + } else { + Ok(result) + } + } + + fn consume_concat_expression( + &mut self, module: &Module, iter: &mut TokenIter, ctx: &mut PassCtx + ) -> Result { + self.consume_generic_binary_expression( + module, iter, ctx, + |token| match token { + Some(TokenKind::At) => Some(BinaryOperator::Concatenate), + _ => None + }, + Self::consume_logical_or_expression + ) + } + + fn consume_logical_or_expression( + &mut self, module: &Module, iter: &mut TokenIter, ctx: &mut PassCtx + ) -> Result { + self.consume_generic_binary_expression( + module, iter, ctx, + |token| match token { + Some(TokenKind::OrOr) => Some(BinaryOperator::LogicalOr), + _ => None + }, + Self::consume_logical_and_expression + ) + } + + fn consume_logical_and_expression( + &mut self, module: &Module, iter: &mut TokenIter, ctx: &mut PassCtx + ) -> Result { + self.consume_generic_binary_expression( + module, iter, ctx, + |token| match token { + Some(TokenKind::AndAnd) => Some(BinaryOperator::LogicalAnd), + _ => None + }, + Self::consume_bitwise_or_expression + ) + } + + fn consume_bitwise_or_expression( + &mut self, module: &Module, iter: &mut TokenIter, ctx: &mut PassCtx + ) -> Result { + self.consume_generic_binary_expression( + module, iter, ctx, + |token| match token { + Some(TokenKind::Or) => Some(BinaryOperator::BitwiseOr), + _ => None + }, + Self::consume_bitwise_xor_expression + ) + } + + fn consume_bitwise_xor_expression( + &mut self, module: &Module, iter: &mut TokenIter, ctx: &mut PassCtx + ) -> Result { + self.consume_generic_binary_expression( + module, iter, ctx, + |token| match token { + Some(TokenKind::Caret) => Some(BinaryOperator::BitwiseXor), + _ => None + }, + Self::consume_bitwise_and_expression ) } -} -enum TypeKind { - Message, - Bool, - UInt8, UInt16, UInt32, UInt64, - SInt8, SInt16, SInt32, SInt64, - Character, String, - Inferred, - Array, - Input, - Output, - SymbolicDefinition(DefinitionId), - SymbolicPolyArg(DefinitionId, usize), + fn consume_bitwise_and_expression( + &mut self, module: &Module, iter: &mut TokenIter, ctx: &mut PassCtx + ) -> Result { + self.consume_generic_binary_expression( + module, iter, ctx, + |token| match token { + Some(TokenKind::And) => Some(BinaryOperator::BitwiseAnd), + _ => None + }, + Self::consume_equality_expression + ) + } + + fn consume_equality_expression( + &mut self, module: &Module, iter: &mut TokenIter, ctx: &mut PassCtx + ) -> Result { + self.consume_generic_binary_expression( + module, iter, ctx, + |token| match token { + Some(TokenKind::EqualEqual) => Some(BinaryOperator::Equality), + Some(TokenKind::NotEqual) => Some(BinaryOperator::Inequality), + _ => None + }, + Self::consume_relational_expression + ) + } + + fn consume_relational_expression( + &mut self, module: &Module, iter: &mut TokenIter, ctx: &mut PassCtx + ) -> Result { + self.consume_generic_binary_expression( + module, iter, ctx, + |token| match token { + Some(TokenKind::OpenAngle) => Some(BinaryOperator::LessThan), + Some(TokenKind::CloseAngle) => Some(BinaryOperator::GreaterThan), + Some(TokenKind::LessEquals) => Some(BinaryOperator::LessThanEqual), + Some(TokenKind::GreaterEquals) => Some(BinaryOperator::GreaterThanEqual), + _ => None + }, + Self::consume_shift_expression + ) + } + + fn consume_shift_expression( + &mut self, module: &Module, iter: &mut TokenIter, ctx: &mut PassCtx + ) -> Result { + self.consume_generic_binary_expression( + module, iter, ctx, + |token| match token { + Some(TokenKind::ShiftLeft) => Some(BinaryOperator::ShiftLeft), + Some(TokenKind::ShiftRight) => Some(BinaryOperator::ShiftRight), + _ => None + }, + Self::consume_add_or_subtract_expression + ) + } + + fn consume_add_or_subtract_expression( + &mut self, module: &Module, iter: &mut TokenIter, ctx: &mut PassCtx + ) -> Result { + self.consume_generic_binary_expression( + module, iter, ctx, + |token| match token { + Some(TokenKind::Plus) => Some(BinaryOperator::Add), + Some(TokenKind::Minus) => Some(BinaryOperator::Subtract), + _ => None, + }, + Self::consume_multiply_divide_or_modulus_expression + ) + } + + fn consume_multiply_divide_or_modulus_expression( + &mut self, module: &Module, iter: &mut Tokeniter, ctx: &mut PassCtx + ) -> Result { + self.consume_generic_binary_expression( + module, iter, ctx, + |token| match token { + Some(TokenKind::Star) => Some(BinaryOperator::Multiply), + Some(TokenKind::Slash) => Some(BinaryOperator::Divide), + Some(TokenKind::Percent) => Some(BinaryOperator::Remainder), + _ => None + }, + Self::consume_prefix_expression + ) + } + + fn consume_prefix_expression( + &mut self, module: &Module, iter: &mut TokenIter, ctx: &mut PassCtx + ) -> Result { + fn parse_prefix_token(token: Option) -> Some(UnaryOperation) { + use TokenKind as TK; + use UnaryOperation as UO; + match token { + Some(TK::Plus) => Some(UO::Positive), + Some(TK::Minus) => Some(UO::Negative), + Some(TK::PlusPlus) => Some(UO::PreIncrement), + Some(TK::MinusMinus) => Some(UO::PreDecrement), + Some(TK::Tilde) => Some(UO::BitwiseNot), + Some(TK::Exclamation) => Some(UO::LogicalNot), + _ => None + } + } + + if let Some(operation) = parse_prefix_token(iter.next()) { + let span = iter.next_span(); + iter.consume(); + + let expression = self.consume_prefix_expression(module, iter, ctx)?; + Ok(ctx.heap.alloc_unary_expression(|this| UnaryExpression { + this, span, operation, expression, + parent: ExpressionParent::None, + concrete_type: ConcreteType::default() + }).upcast()) + } else { + self.consume_postfix_expression(module, iter, ctx) + } + } + + fn consume_postfix_expression( + &mut self, module: &Module, iter: &mut TokenIter, ctx: &mut PassCtx + ) -> Result { + fn has_matching_postfix_token(token: Option) -> bool { + use TokenKind as TK; + + if token.is_none() { return false; } + match token.unwrap() { + TK::PlusPlus | TK::MinusMinus | TK::OpenSquare | TK::Dot => true, + _ => false + } + } + + let mut result = self.consume_primary_expression(module, iter, ctx)?; + let mut next = iter.next(); + while has_matching_postfix_token(next) { + let token = next.unwrap(); + let mut span = iter.next_span(); + iter.consume(); + + if token == TokenKind::PlusPlus { + result = ctx.heap.alloc_unary_expression(|this| UnaryExpression{ + this, span, + operation: UnaryOperation::PostIncrement, + expression: result, + parent: ExpressionParent::None, + concrete_type: ConcreteType::default() + }).upcast(); + } else if token == TokenKind::MinusMinus { + result = ctx.heap.alloc_unary_expression(|this| UnaryExpression{ + this, span, + operation: UnaryOperation::PostDecrement, + expression: result, + parent: ExpressionParent::None, + concrete_type: ConcreteType::default() + }).upcast(); + } else if token == TokenKind::OpenSquare { + let subject = result; + let from_index = self.consume_expression(module, iter, ctx)?; + + // Check if we have an indexing or slicing operation + next = iter.next(); + if Some(TokenKind::DotDot) = next { + iter.consume(); + + let to_index = self.consume_expression(module, iter, ctx)?; + let end_span = consume_token(&module.source, iter, TokenKind::CloseSquare)?; + span.end = end_span.end; + + result = ctx.heap.alloc_slicing_expression(|this| SlicingExpression{ + this, span, subject, from_index, to_index, + parent: ExpressionParent::None, + concrete_type: ConcreteType::default() + }).upcast(); + } else if Some(TokenKind::CloseSquare) { + let end_span = consume_token(&module.source, iter, TokenKind::CloseSquare)?; + span.end = end_span.end; + + result = ctx.heap.alloc_indexing_expression(|this| IndexingExpression{ + this, span, subject, + index: from_index, + parent: ExpressionParent::None, + concrete_type: ConcreteType::default() + }).upcast(); + } else { + return Err(ParseError::new_error_str_at_pos( + &module.source, iter.last_valid_pos(), "unexpected token: expected ']' or '..'" + )); + } + } else { + debug_assert_eq!(token, TokenKind::Dot); + let subject = result; + let (field_text, field_span) = consume_ident(&module.source, iter)?; + let field = if field_text == b"length" { + Field::Length + } else { + let value = ctx.pool.intern(field_text); + let identifier = Identifier{ value, span: field_span }; + Field::Symbolic(FieldSymbolic{ identifier, definition: None, field_idx: 0 }); + }; + + result = ctx.heap.alloc_select_expression(|this| SelectExpression{ + this, span, subject, field, + parent: ExpressionParent::None, + concrete_type: ConcreteType::default() + }).upcast(); + } + + next = iter.next(); + } + + Ok(result) + } + + fn consume_primary_expression( + &mut self, module: &Module, iter: &mut TokenIter, ctx: &mut PassCtx + ) -> Result { + let next = iter.next(); + + let result; + if next == Some(TokenKind::OpenParen) { + // Expression between parentheses + iter.consume(); + result = self.consume_expression(module, iter, ctx)?; + consume_token(&module.source, iter, TokenKind::CloseParen)?; + } else if next == Some(TokenKind::OpenCurly) { + // Array literal + let (start_pos, mut end_pos) = iter.next_positions(); + let mut expressions = Vec::new(); + consume_comma_separated( + TokenKind::OpenCurly, TokenKind::CloseCurly, &module.source, iter, + |source, iter| self.consume_expression(module, iter, ctx), + &mut expressions, "an expression", "a list of expressions", Some(&mut end_pos) + )?; + + // TODO: Turn into literal + result = ctx.heap.alloc_array_expression(|this| ArrayExpression{ + this, + span: InputSpan::from_positions(start_pos, end_pos), + elements: expressions, + parent: ExpressionParent::None, + concrete_type: ConcreteType::default(), + }).upcast(); + } else if next == Some(TokenKind::Integer) { + let (literal, span) = consume_integer_literal(&module.source, iter, &mut self.buffer)?; + result = ctx.heap.alloc_literal_expression(|this| LiteralExpression{ + this, span, + value: Literal::Integer(LiteralInteger{ unsigned_value: literal, negated: false }), + parent: ExpressionParent::None, + concrete_type: ConcreteType::default(), + }).upcast(); + } else if next == Some(TokenKind::String) { + let (text, span) = consume_string_literal(&module.source, iter, &mut self.buffer)?; + } else if next == Some(TokenKind::Character) { + + } + + Ok(result) + } + + //-------------------------------------------------------------------------- + // Expression Utilities + //-------------------------------------------------------------------------- + + #[inline] + fn consume_generic_binary_expression< + M: Fn(Option) -> Option, + F: Fn(&mut PassDefinitions, &Module, &mut TokenIter, &mut PassCtx) -> Result + >( + &mut self, module: &Module, iter: &mut TokenIter, ctx: &mut PassCtx, match_fn: M, higher_precedence_fn: F + ) -> Result { + let mut result = higher_precedence_fn(self, module, iter, ctx)?; + while let Some(operation) = match_fn(iter.next()) { + let span = iter.next_span(); + iter.consume(); + + let left = result; + let right = higher_precedence_fn(self, module, iter, ctx)?; + + result = ctx.heap.alloc_binary_expression(|this| BinaryExpression{ + this, span, left, operation, right, + parent: ExpressionParent::None, + concrete_type: ConcreteType::default() + }).upcast(); + } + + Ok(result) + } } /// Consumes a type. A type always starts with an identifier which may indicate @@ -118,82 +756,367 @@ enum TypeKind { /// polymorphic arguments makes it a tree-like structure. Because we cannot rely /// on knowing the exact number of polymorphic arguments we do not check for /// these. +// TODO: @Optimize, and fix spans if needed fn consume_parser_type( - source: &InputSource, iter: &mut TokenIter, ctx: &mut PassCtx, poly_vars: &[Identifier] -) -> Result<(), ParseError> { - struct StackEntry { - angle_depth: i32, + source: &InputSource, iter: &mut TokenIter, symbols: &SymbolTable, heap: &Heap, poly_vars: &[Identifier], + cur_scope: SymbolScope, wrapping_definition: DefinitionId, allow_inference: bool +) -> Result { + struct Entry{ + element: ParserTypeElement, + depth: i32, } - let mut type_stack = Vec::new(); - Ok(()) + fn insert_array_before(elements: &mut Vec, depth: i32, span: InputSpan) { + let index = elements.iter().rposition(|e| e.depth == depth).unwrap(); + elements.insert(index, Entry{ + element: ParserTypeElement{ full_span: span, variant: ParserTypeVariant::Array }, + depth, + }); + } + + // Most common case we just have one type, perhaps with some array + // annotations. + let element = consume_parser_type_ident(source, iter, symbols, heap, poly_vars, cur_scope, wrapping_definition, allow_inference)?; + if iter.next() != Some(TokenKind::OpenAngle) { + let mut num_array = 0; + while iter.next() == Some(TokenKind::OpenSquare) { + iter.consume(); + consume_token(source, iter, TokenKind::CloseSquare)?; + num_array += 1; + } + + let array_span = element.full_span; + let mut elements = Vec::with_capacity(num_array + 1); + for _ in 0..num_array { + elements.push(ParserTypeElement{ full_span: array_span, variant: ParserTypeVariant::Array }); + } + elements.push(element); + + return Ok(ParserType{ elements }); + }; + + // We have a polymorphic specification. So we start by pushing the item onto + // our stack, then start adding entries together with the angle-brace depth + // at which they're found. + let mut elements = Vec::new(); + elements.push(Entry{ element, depth: 0 }); + + // Start out with the first '<' consumed. + iter.consume(); + enum State { Ident, Open, Close, Comma }; + let mut state = State::Open; + let mut angle_depth = 1; + + loop { + let next = iter.next(); + + match state { + State::Ident => { + // Just parsed an identifier, may expect comma, angled braces, + // or the tokens indicating an array + if Some(TokenKind::OpenAngle) == next { + angle_depth += 1; + state = State::Open; + } else if Some(TokenKind::CloseAngle) == next { + angle_depth -= 1; + state = State::Close; + } else if Some(TokenKind::ShiftRight) == next { + angle_depth -= 2; + state = State::Close; + } else if Some(TokenKind::Comma) == next { + state = State::Comma; + } else if Some(TokenKind::OpenSquare) == next { + let (start_pos, _) = iter.next_positions(); + iter.consume(); // consume opening square + if iter.next() != Some(TokenKind::CloseSquare) { + return Err(ParseError::new_error_str_at_pos( + source, iter.last_valid_pos(), + "unexpected token: expected ']'" + )); + } + let (_, end_pos) = iter.next_positions(); + let array_span = InputSpan::from_positions(start_pos, end_pos); + insert_array_before(&mut elements, angle_depth, array_span); + } else { + return Err(ParseError::new_error_str_at_pos( + source, iter.last_valid_pos(), + "unexpected token: expected '<', '>', ',' or '['") + ); + } + + iter.consume(); + }, + State::Open => { + // Just parsed an opening angle bracket, expecting an identifier + let element = consume_parser_type_ident(source, iter, symbols, heap, poly_vars, cur_scope, wrapping_definition, allow_inference)?; + elements.push(Entry{ element, depth: angle_depth }); + state = State::Ident; + }, + State::Close => { + // Just parsed 1 or 2 closing angle brackets, expecting comma, + // more closing brackets or the tokens indicating an array + if Some(TokenKind::Comma) == next { + state = State::Comma; + } else if Some(TokenKind::CloseAngle) == next { + angle_depth -= 1; + state = State::Close; + } else if Some(TokenKind::ShiftRight) == next { + angle_depth -= 2; + state = State::Close; + } else if Some(TokenKind::OpenSquare) == next { + let (start_pos, _) = iter.next_positions(); + iter.consume(); + if iter.next() != Some(TokenKind::CloseSquare) { + return Err(ParseError::new_error_str_at_pos( + source, iter.last_valid_pos(), + "unexpected token: expected ']'" + )); + } + let (_, end_pos) = iter.next_positions(); + let array_span = InputSpan::from_positions(start_pos, end_pos); + insert_array_before(&mut elements, angle_depth, array_span); + } else { + return Err(ParseError::new_error_str_at_pos( + source, iter.last_valid_pos(), + "unexpected token: expected ',', '>', or '['") + ); + } + + iter.consume(); + }, + State::Comma => { + // Just parsed a comma, expecting an identifier or more closing + // braces + if Some(TokenKind::Ident) == next { + let element = consume_parser_type_ident(source, iter, symbols, heap, poly_vars, cur_scope, wrapping_definition, allow_inference)?; + elements.push(Entry{ element, depth: angle_depth }); + state = State::Ident; + } else if Some(TokenKind::CloseAngle) == next { + iter.consume(); + angle_depth -= 1; + state = State::Close; + } else if Some(TokenKind::ShiftRight) == next { + iter.consume(); + angle_depth -= 2; + state = State::Close; + } else { + return Err(ParseError::new_error_str_at_pos( + source, iter.last_valid_pos(), + "unexpected token: expected '>' or a type name" + )); + } + } + } + + if angle_depth < 0 { + return Err(ParseError::new_error_str_at_pos(source, iter.last_valid_pos(), "unmatched '>'")); + } else if angle_depth == 0 { + break; + } + } + + // If here then we found the correct number of angle braces. But we still + // need to make sure that each encountered type has the correct number of + // embedded types. + let mut idx = 0; + while idx < elements.len() { + let cur_element = &elements[idx]; + let expected_subtypes = cur_element.element.variant.num_embedded(); + let mut encountered_subtypes = 0; + for peek_idx in idx + 1..elements.len() { + let peek_element = &elements[peek_idx]; + if peek_element.depth == cur_element.depth + 1 { + encountered_subtypes += 1; + } else if peek_element.depth <= cur_element.depth { + break; + } + } + + if expected_subtypes != encountered_subtypes { + if encountered_subtypes == 0 { + // Case where we have elided the embedded types, all of them + // should be inferred. + if !allow_inference { + return Err(ParseError::new_error_str_at_span( + source, cur_element.element.full_span, + "type inference is not allowed here" + )); + } + + // Insert the missing types + let inserted_span = cur_element.element.full_span; + let inserted_depth = cur_element.depth + 1; + elements.reserve(expected_subtypes); + for _ in 0..expected_subtypes { + elements.insert(idx + 1, Entry{ + element: ParserTypeElement{ full_span: inserted_span, variant: ParserTypeVariant::Inferred }, + depth: inserted_depth, + }); + } + } else { + // Mismatch in number of embedded types + let expected_args_text = if expected_subtypes == 1 { + "polymorphic argument" + } else { + "polymorphic arguments" + }; + + let maybe_infer_text = if allow_inference { + " (or none, to perform implicit type inference)" + } else { + "" + }; + + return Err(ParseError::new_error_at_span( + source, cur_element.element.full_span, + format!( + "expected {} {}{}, but {} were provided", + expected_subtypes, expected_args_text, maybe_infer_text, encountered_subtypes + ) + )); + } + } + + idx += 1; + } + + let mut constructed_elements = Vec::with_capacity(elements.len()); + for element in elements.into_iter() { + constructed_elements.push(element.element); + } + + Ok(ParserType{ elements: constructed_elements }) } fn consume_parser_type_ident( - source: &InputSource, iter: &mut TokenIter, ctx: &mut PassCtx, - mut scope: SymbolScope, wrapping_definition: DefinitionId, poly_vars: &[Identifier] -) -> Result<(TypeKind, InputSpan), ParseError> { - let (type_text, type_span) = consume_any_ident(source, iter)?; - - let type_kind = match type_text { - KW_TYPE_MESSAGE => TypeKind::Message, - KW_TYPE_BOOL => TypeKind::Bool, - KW_TYPE_UINT8 => TypeKind::UInt8, - KW_TYPE_UINT16 => TypeKind::UInt16, - KW_TYPE_UINT32 => TypeKind::UInt32, - KW_TYPE_UINT64 => TypeKind::UInt64, - KW_TYPE_SINT8 => TypeKind::SInt8, - KW_TYPE_SINT16 => TypeKind::SInt16, - KW_TYPE_SINT32 => TypeKind::SInt32, - KW_TYPE_SINT64 => TypeKind::SInt64, - KW_TYPE_IN_PORT => TypeKind::Input, - KW_TYPE_OUT_PORT => TypeKind::Output, + source: &InputSource, iter: &mut TokenIter, symbols: &SymbolTable, heap: &Heap, poly_vars: &[Identifier], + mut scope: SymbolScope, wrapping_definition: DefinitionId, allow_inference: bool, +) -> Result { + use ParserTypeVariant as PTV; + let (mut type_text, mut type_span) = consume_any_ident(source, iter)?; + + let variant = match type_text { + KW_TYPE_MESSAGE => PTV::Message, + KW_TYPE_BOOL => PTV::Bool, + KW_TYPE_UINT8 => PTV::UInt8, + KW_TYPE_UINT16 => PTV::UInt16, + KW_TYPE_UINT32 => PTV::UInt32, + KW_TYPE_UINT64 => PTV::UInt64, + KW_TYPE_SINT8 => PTV::SInt8, + KW_TYPE_SINT16 => PTV::SInt16, + KW_TYPE_SINT32 => PTV::SInt32, + KW_TYPE_SINT64 => PTV::SInt64, + KW_TYPE_IN_PORT => PTV::Input, + KW_TYPE_OUT_PORT => PTV::Output, + KW_TYPE_CHAR => PTV::Character, + KW_TYPE_STRING => PTV::String, + KW_TYPE_INFERRED => { + if !allow_inference { + return Err(ParseError::new_error_str_at_span(source, type_span, "type inference is not allowed here")); + } + + PTV::Inferred + }, _ => { // Must be some kind of symbolic type let mut type_kind = None; for (poly_idx, poly_var) in poly_vars.iter().enumerate() { if poly_var.value.as_bytes() == type_text { - type_kind = Some(TypeKind::SymbolicPolyArg(wrapping_definition, poly_idx)); + type_kind = Some(PTV::PolymorphicArgument(wrapping_definition, poly_idx)); } } if type_kind.is_none() { - // Check symbol table for definition - let last_symbol = ctx.symbols.get_symbol_by_name(scope, type_text); + // Check symbol table for definition. To be fair, the language + // only allows a single namespace for now. That said: + let last_symbol = symbols.get_symbol_by_name(scope, type_text); if last_symbol.is_none() { return Err(ParseError::new_error_str_at_span(source, type_span, "unknown type")); } - let last_symbol = last_symbol.unwrap(); - match last_symbol.variant { - SymbolVariant::Module(symbol_module) => { - // Keep seeking - }, - SymbolVariant::Definition(symbol_definition) => { + let mut last_symbol = last_symbol.unwrap(); + + loop { + match &last_symbol.variant { + SymbolVariant::Module(symbol_module) => { + // Expecting more identifiers + if Some(TokenKind::ColonColon) != iter.next() { + return Err(ParseError::new_error_str_at_span(source, type_span, "expected type but got module")); + } + consume_token(source, iter, TokenKind::ColonColon)?; + + // Consume next part of type and prepare for next + // lookup loop + let (next_text, next_span) = consume_any_ident(source, iter)?; + let old_text = type_text; + type_text = next_text; + type_span.end = next_span.end; + scope = SymbolScope::Module(symbol_module.root_id); + + let new_symbol = symbols.get_symbol_by_name_defined_in_scope(scope, type_text); + if new_symbol.is_none() { + return Err(ParseError::new_error_at_span( + source, next_span, + format!( + "unknown type '{}' in module '{}'", + String::from_utf8_lossy(type_text), + String::from_utf8_lossy(old_text) + ) + )); + } + + last_symbol = new_symbol.unwrap(); + }, + SymbolVariant::Definition(symbol_definition) => { + let num_poly_vars = heap[symbol_definition.definition_id].poly_vars().len(); + type_kind = Some(PTV::Definition(symbol_definition.definition_id, num_poly_vars)); + break; + } } } } - } - } + debug_assert!(type_kind.is_some()); + type_kind.unwrap() + }, + }; - Ok(()) + Ok(ParserTypeElement{ full_span: type_span, variant }) } -/// Consumes polymorphic variables (i.e. a list of identifiers). If the list is -/// absent then we simply return an empty array. -fn consume_polymorphic_vars( - source: &InputSource, iter: &mut TokenIter, ctx: &mut PassCtx, target: &mut Vec -) -> Result<(), ParseError> { - // Note: because this is just a list of identifiers, we don't have to take - // two `TokenKind::CloseAngle` interpreted as `TokenKind::ShiftRight` into - // account. - debug_assert!(target.is_empty()); - maybe_consume_comma_separated( +/// Consumes polymorphic variables and throws them on the floor. +fn consume_polymorphic_vars_spilled(source: &InputSource, iter: &mut TokenIter) -> Result<(), ParseError> { + maybe_consume_comma_separated_spilled( TokenKind::OpenAngle, TokenKind::CloseAngle, source, iter, - |source, iter| consume_ident_interned(source, iter, ctx), - target, "a polymorphic variable" + |source, iter| { + consume_ident(source, iter)?; + Ok(()) + }, "a polymorphic variable" )?; - Ok(()) +} + +/// Consumes the parameter list to functions/components +fn consume_parameter_list( + source: &InputSource, iter: &mut TokenIter, ctx: &mut PassCtx, target: &mut Vec, + poly_vars: &[Identifier], scope: SymbolScope, definition_id: DefinitionId +) -> Result<(), ParseError> { + consume_comma_separated( + TokenKind::OpenParen, TokenKind::CloseParen, source, iter, + |source, iter| { + let (start_pos, _) = iter.next_positions(); + let parser_type = consume_parser_type( + source, iter, &ctx.symbols, &ctx.heap, poly_vars, scope, definition_id, false + )?; + let identifier = consume_ident_interned(source, iter, ctx)?; + let parameter_id = ctx.heap.alloc_parameter(|this| Parameter{ + this, + span: InputSpan::from_positions(start_pos, identifier.span.end), + parser_type, + identifier + }); + Ok(parameter_id) + }, + target, "a parameter", "a parameter list", None + ) } \ No newline at end of file diff --git a/src/protocol/parser/pass_symbols.rs b/src/protocol/parser/pass_symbols.rs index e4acb219a132d1130240084feea0ddfc77ecee73..94ee54a7c0c228d208db3434750b56b7351c7f4b 100644 --- a/src/protocol/parser/pass_symbols.rs +++ b/src/protocol/parser/pass_symbols.rs @@ -181,6 +181,12 @@ impl PassSymbols { // Retrieve identifier of definition let identifier = consume_ident_interned(&module.source, &mut iter, ctx)?; + let mut poly_vars = Vec::new(); + maybe_consume_comma_separated( + TokenKind::OpenAngle, TokenKind::CloseAngle, &module.source, &mut iter, + |source, iter| consume_ident_interned(source, iter, ctx), + &mut poly_vars, "a polymorphic variable" + )?; let ident_text = identifier.value.clone(); // because we need it later // Reserve space in AST for definition and add it to the symbol table @@ -189,28 +195,28 @@ impl PassSymbols { match kw_text { KW_STRUCT => { let struct_def_id = ctx.heap.alloc_struct_definition(|this| { - StructDefinition::new_empty(this, definition_span, identifier) + StructDefinition::new_empty(this, definition_span, identifier, poly_vars) }); definition_class = DefinitionClass::Struct; ast_definition_id = struct_def_id.upcast(); }, KW_ENUM => { let enum_def_id = ctx.heap.alloc_enum_definition(|this| { - EnumDefinition::new_empty(this, definition_span, identifier) + EnumDefinition::new_empty(this, definition_span, identifier, poly_vars) }); definition_class = DefinitionClass::Enum; ast_definition_id = enum_def_id.upcast(); }, KW_UNION => { let union_def_id = ctx.heap.alloc_union_definition(|this| { - UnionDefinition::new_empty(this, definition_span, identifier) + UnionDefinition::new_empty(this, definition_span, identifier, poly_vars) }); definition_class = DefinitionClass::Union; ast_definition_id = union_def_id.upcast() }, KW_FUNCTION => { let func_def_id = ctx.heap.alloc_function_definition(|this| { - FunctionDefinition::new_empty(this, definition_span, identifier) + FunctionDefinition::new_empty(this, definition_span, identifier, poly_vars) }); definition_class = DefinitionClass::Function; ast_definition_id = func_def_id.upcast(); @@ -222,7 +228,7 @@ impl PassSymbols { ComponentVariant::Composite }; let comp_def_id = ctx.heap.alloc_component_definition(|this| { - ComponentDefinition::new_empty(this, definition_span, component_variant, identifier) + ComponentDefinition::new_empty(this, definition_span, component_variant, identifier, poly_vars) }); definition_class = DefinitionClass::Component; ast_definition_id = comp_def_id.upcast(); diff --git a/src/protocol/parser/pass_tokenizer.rs b/src/protocol/parser/pass_tokenizer.rs index f0a8c44190f142a38b65583fd1836c70c933ae90..288fb9ecc4b1e597a23440610a10b02f2d5dd940 100644 --- a/src/protocol/parser/pass_tokenizer.rs +++ b/src/protocol/parser/pass_tokenizer.rs @@ -5,7 +5,8 @@ use crate::protocol::input_source2::{ InputSpan }; -use crate::protocol::parser::tokens::*; +use super::tokens::*; +use super::token_parsing::*; /// Tokenizer is a reusable parser to tokenize multiple source files using the /// same allocated buffers. In a well-formed program, we produce a consistent @@ -27,8 +28,12 @@ pub(crate) struct PassTokenizer { impl PassTokenizer { pub(crate) fn new() -> Self { - Self{ curly_stack: Vec::with_capacity(32), stack_idx: 0 } + Self{ + curly_stack: Vec::with_capacity(32), + stack_idx: 0 + } } + pub(crate) fn tokenize(&mut self, source: &mut InputSource, target: &mut TokenBuffer) -> Result<(), ParseError> { // Assert source and buffer are at start debug_assert_eq!(source.pos().offset, 0); @@ -289,7 +294,8 @@ impl PassTokenizer { token_kind = TokenKind::SemiColon; } else if first_char == b'<' { source.consume(); - if let Some(b'<') = source.next() { + let next = source.next(); + if let Some(b'<') = next { source.consume(); if let Some(b'=') = source.next() { source.consume(); @@ -297,6 +303,9 @@ impl PassTokenizer { } else { token_kind = TokenKind::ShiftLeft; } + } else if let Some(b'=') = next { + source.consume(); + token_kind = TokenKind::LessEquals; } else { token_kind = TokenKind::OpenAngle; } @@ -310,7 +319,8 @@ impl PassTokenizer { } } else if first_char == b'>' { source.consume(); - if let Some(b'>') = source.next() { + let next = source.next(); + if let Some(b'>') = next { source.consume(); if let Some(b'=') = source.next() { source.consume(); @@ -318,6 +328,9 @@ impl PassTokenizer { } else { token_kind = TokenKind::ShiftRight; } + } else if Some(b'=') = next { + source.consume(); + token_kind = TokenKind::GreaterEquals; } else { token_kind = TokenKind::CloseAngle; } diff --git a/src/protocol/parser/token_parsing.rs b/src/protocol/parser/token_parsing.rs index bf7b4e734bde19af993f20eb553d449ac2b2584b..7681a235114006ab61b0e494a3bd7fbe4c0c8280 100644 --- a/src/protocol/parser/token_parsing.rs +++ b/src/protocol/parser/token_parsing.rs @@ -54,10 +54,10 @@ pub(crate) const KW_TYPE_UINT8: &'static [u8] = b"u8"; pub(crate) const KW_TYPE_UINT16: &'static [u8] = b"u16"; pub(crate) const KW_TYPE_UINT32: &'static [u8] = b"u32"; pub(crate) const KW_TYPE_UINT64: &'static [u8] = b"u64"; -pub(crate) const KW_TYPE_SINT8: &'static [u8] = b"s8"; -pub(crate) const KW_TYPE_SINT16: &'static [u8] = b"s16"; -pub(crate) const KW_TYPE_SINT32: &'static [u8] = b"s32"; -pub(crate) const KW_TYPE_SINT64: &'static [u8] = b"s64"; +pub(crate) const KW_TYPE_SINT8: &'static [u8] = b"s8"; +pub(crate) const KW_TYPE_SINT16: &'static [u8] = b"s16"; +pub(crate) const KW_TYPE_SINT32: &'static [u8] = b"s32"; +pub(crate) const KW_TYPE_SINT64: &'static [u8] = b"s64"; pub(crate) const KW_TYPE_CHAR: &'static [u8] = b"char"; pub(crate) const KW_TYPE_STRING: &'static [u8] = b"string"; pub(crate) const KW_TYPE_INFERRED: &'static [u8] = b"auto"; @@ -93,15 +93,16 @@ pub(crate) fn consume_domain_ident<'a>( /// Consumes a specific expected token. Be careful to only call this with tokens /// that do not have a variable length. -pub(crate) fn consume_token(source: &InputSource, iter: &mut TokenIter, expected: TokenKind) -> Result<(), ParseError> { +pub(crate) fn consume_token(source: &InputSource, iter: &mut TokenIter, expected: TokenKind) -> Result { if Some(expected) != iter.next() { return Err(ParseError::new_error_at_pos( source, iter.last_valid_pos(), format!("expected '{}'", expected.token_chars()) )); } + let span = iter.next_span(); iter.consume(); - Ok(()) + Ok(span) } /// Consumes a comma-separated list of items if the opening delimiting token is @@ -113,7 +114,8 @@ pub(crate) fn consume_token(source: &InputSource, iter: &mut TokenIter, expected /// - Found an opening delimiter, but processing an item failed. pub(crate) fn maybe_consume_comma_separated( open_delim: TokenKind, close_delim: TokenKind, source: &InputSource, iter: &mut TokenIter, - consumer_fn: F, target: &mut Vec, item_name_and_article: &'static str + consumer_fn: F, target: &mut Vec, item_name_and_article: &'static str, + close_pos: Option<&mut InputPosition> ) -> Result where F: Fn(&InputSource, &mut TokenIter) -> Result { @@ -129,9 +131,14 @@ pub(crate) fn maybe_consume_comma_separated( loop { next = iter.next(); if Some(close_delim) == next { + if let Some(close_pos) = close_pos { + // If requested return the position of the closing delimiter + let (_, new_close_pos) = iter.next_positions(); + *close_pos = new_close_pos; + } iter.consume(); break; - } else if !had_comma { + } else if !had_comma || next.is_none() { return Err(ParseError::new_error_at_pos( source, iter.last_valid_pos(), format!("expected a '{}', or {}", close_delim.token_chars(), item_name_and_article) @@ -151,22 +158,59 @@ pub(crate) fn maybe_consume_comma_separated( Ok(true) } +pub(crate) fn maybe_consume_comma_separated_spilled Result<(), ParseError>>( + open_delim: TokenKind, close_delim: TokenKind, source: &InputSource, iter: &mut TokenIter, + consumer_fn: F, item_name_and_article: &'static str +) -> Result { + let mut next = iter.next(); + if Some(open_delim) != next { + return Ok(false); + } + + iter.consume(); + let mut had_comma = true; + loop { + next = iter.next(); + if Some(close_delim) == next { + iter.consume(); + break; + } else if !had_comma { + return Err(ParseError::new_error_at_pos( + source, iter.last_valid_pos(), + format!("expected a '{}', or {}", close_delim.token_chars(), item_name_and_article) + )); + } + + consumer_fn(source, iter)?; + next = iter.next(); + had_comma = next == Some(TokenKind::Comma); + if had_comma { + iter.consume(); + } + } + + Ok(true) +} + /// Consumes a comma-separated list and expected the opening and closing /// characters to be present. The returned array may still be empty pub(crate) fn consume_comma_separated( open_delim: TokenKind, close_delim: TokenKind, source: &InputSource, iter: &mut TokenIter, consumer_fn: F, target: &mut Vec, item_name_and_article: &'static str, - list_name_and_article: &'static str + list_name_and_article: &'static str, close_pos: Option<&mut InputPosition> ) -> Result<(), ParseError> where F: Fn(&InputSource, &mut TokenIter) -> Result { let first_pos = iter.last_valid_pos(); - match maybe_consume_comma_separated(open_delim, close_delim, source, iter, consumer_fn, target, item_name_and_article) { + match maybe_consume_comma_separated( + open_delim, close_delim, source, iter, consumer_fn, target, + item_name_and_article, close_pos + ) { Ok(true) => Ok(()), Ok(false) => { return Err(ParseError::new_error_at_pos( source, first_pos, - format!("expected a {}", list_name_and_article) + format!("expected {}", list_name_and_article) )); }, Err(err) => Err(err) @@ -225,6 +269,24 @@ pub(crate) fn consume_integer_literal(source: &InputSource, iter: &mut TokenIter } } +/// Consumes a character literal. We currently support a limited number of +/// backslash-escaped characters +pub(crate) fn consume_character_literal(source: &InputSource, iter: &mut TokenIter, buffer: &mut String) -> Result { + if Some(TokenKind::Character) != iter.next() { + return Err(ParseError::new_error_str_at_pos(source, iter.last_valid_pos(), "expected a character literal")); + } + let char_span = iter.next_span(); + iter.consume(); + + let char_text = source.section_at_span(char_span); + + // +} + +/// Consumes a string literal. We currently support a limited number of +/// backslash-escaped characters. +pub(crate) fn consume_string_literal(source: &InputSource, iter: &mut TokenIter, buffer: &mut String) -> Result<(> + pub(crate) fn consume_pragma<'a>(source: &'a InputSource, iter: &mut TokenIter) -> Result<(&'a [u8], InputPosition, InputPosition), ParseError> { if Some(TokenKind::Pragma) != iter.next() { return Err(ParseError::new_error_str_at_pos(source, iter.last_valid_pos(), "expected a pragma")); diff --git a/src/protocol/parser/tokens.rs b/src/protocol/parser/tokens.rs index 710548b32f8365e009f48646c294251faadbb1f5..57c8c5d8f2a1a1ebf42033a780eb73cb5bad06d4 100644 --- a/src/protocol/parser/tokens.rs +++ b/src/protocol/parser/tokens.rs @@ -66,7 +66,9 @@ pub(crate) enum TokenKind { EqualEqual, // == NotEqual, // != ShiftLeft, // << + LessEquals, // <= ShiftRight, // >> + GreaterEquals, // >= // Operator-like (three characters) ShiftLeftEquals,// <<= ShiftRightEquals, // >>= @@ -146,7 +148,9 @@ impl TokenKind { TK::EqualEqual => "==", TK::NotEqual => "!=", TK::ShiftLeft => "<<", + TK::LessEquals => "<=", TK::ShiftRight => ">>", + TK::GreaterEquals => ">=", TK::ShiftLeftEquals => "<<=", TK::ShiftRightEquals => ">>=", // Lets keep these in explicitly for now, in case we want to add more symbols @@ -252,7 +256,7 @@ impl<'a> TokenIter<'a> { /// Returns the next token (but skips over comments), or `None` if at the /// end of the range pub(crate) fn next(&mut self) -> Option { - while let Some(token_kind) = self.next() { + while let Some(token_kind) = self.next_including_comments() { if token_kind != TokenKind::LineComment && token_kind != TokenKind::BlockComment { return Some(token_kind); } @@ -262,6 +266,19 @@ impl<'a> TokenIter<'a> { return None } + /// Peeks ahead by one token (i.e. the one that comes after `next()`), and + /// skips over comments + pub(crate) fn peek(&self) -> Option { + for next_idx in self.cur + 1..self.end { + let next_kind = self.tokens[next_idx].kind; + if next_kind != TokenKind::LineComment && next_kind != TokenKind::BlockComment && next_kind != TokenKind::SpanEnd { + return Some(next_kind); + } + } + + return None; + } + /// Returns the start position belonging to the token returned by `next`. If /// there is not a next token, then we return the end position of the /// previous token.