Changeset - 393872d4a8f8
[Not reviewed]
0 1 0
MH - 4 years ago 2021-03-17 12:23:30
henger@cwi.nl
yet another bit of progress on type inference
1 file changed with 170 insertions and 44 deletions:
0 comments (0 inline, 0 general)
src/protocol/parser/type_resolver.rs
Show inline comments
 
use std::collections::{HashMap, VecDeque};
 
use std::{collections::{HashMap, VecDeque}, fmt::Display};
 

	
 
use crate::protocol::ast::*;
 
use crate::protocol::inputsource::*;
 
use super::type_table::*;
 
use super::symbol_table::*;
 
use super::visitor::{
 
    STMT_BUFFER_INIT_CAPACITY,
 
    EXPR_BUFFER_INIT_CAPACITY,
 
    Ctx,
 
    Visitor2,
 
    VisitorResult
 
};
 
@@ -112,32 +112,89 @@ impl InferenceType {
 
        debug_assert!(!self.parts.is_empty());
 
        if self.parts.len() != 1 { return false; }
 
        match self.parts[0] {
 
            IP::Unknown | IP::IntegerLike | IP::Byte | IP::Short | IP::Int | IP::Long => true,
 
            _ => false
 
        }
 
    }
 

	
 
    fn might_be_integer(&self) -> bool {
 
        // TODO: @float Once floats are implemented this is no longer true
 
        self.might_be_numeric()
 
    }
 
}
 

	
 
impl std::fmt::Display for InferenceType {
 
    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
 
        fn write_recursive(arg: )
 
    fn display_name(&self, heap: &Heap) -> String {
 
        use InferredPart as IP;
 

	
 
        fn write_recursive(v: &mut String, t: &InferenceType, h: &Heap, idx: &mut usize) {
 
            match &t.parts[*idx] {
 
                IP::Unknown => v.push_str("?"),
 
                IP::Void => v.push_str("void"),
 
                IP::IntegerLike => v.push_str("int?"),
 
                IP::Message => v.push_str("msg"),
 
                IP::Bool => v.push_str("bool"),
 
                IP::Byte => v.push_str("byte"),
 
                IP::Short => v.push_str("short"),
 
                IP::Int => v.push_str("int"),
 
                IP::Long => v.push_str("long"),
 
                IP::String => v.push_str("str"),
 
                IP::Array => {
 
                    *idx += 1;
 
                    write_recursive(v, t, h, idx);
 
                    v.push_str("[]");
 
                },
 
                IP::Slice => {
 
                    *idx += 1;
 
                    write_recursive(v, t, h, idx);
 
                    v.push_str("[..]")
 
                },
 
                IP::Input => {
 
                    *idx += 1;
 
                    v.push_str("in<");
 
                    write_recursive(v, t, h, idx);
 
                    v.push('>');
 
                },
 
                IP::Output => {
 
                    *idx += 1;
 
                    v.push_str("out<");
 
                    write_recursive(v, t, h, idx);
 
                    v.push('>');
 
                },
 
                IP::Instance(definition_id, num_sub) => {
 
                    let definition = &h[*definition_id];
 
                    v.push_str(&String::from_utf8_lossy(&definition.identifier().value));
 
                    if *num_sub > 0 {
 
                        v.push('<');
 
                        *idx += 1;
 
                        write_recursive(v, t, h, idx);
 
                        for sub_idx in 1..*num_sub {
 
                            *idx += 1;
 
                            v.push_str(", ");
 
                            write_recursive(v, t, h, idx);
 
                        }
 
                        v.push('>');
 
                    }
 
                },
 
            }
 
        }
 

	
 
        let mut buffer = String::with_capacity(self.parts.len() * 5);
 
        let mut idx = 0;
 
        write_recursive(&mut buffer, self, heap, &mut idx);
 

	
 
        buffer
 
    }
 
}
 

	
 
#[derive(PartialEq, Eq)]
 
enum InferenceResult {
 
    Neither,        // neither argument is clarified
 
    First,          // first argument is clarified using the second one
 
    Second,         // second argument is clarified using the first one
 
    Both,           // both arguments are clarified
 
    Incompatible,   // types are incompatible: programmer error
 
}
 

	
 
impl InferenceResult {
 
    fn modified_any(&self) -> bool {
 
        match self {
 
            InferenceResult::First | InferenceResult::Second | InferenceResult::Both => true,
 
@@ -148,103 +205,105 @@ impl InferenceResult {
 
        match self {
 
            InferenceResult::First | InferenceResult::Both => true,
 
            _ => false
 
        }
 
    }
 
}
 

	
 
// Attempts to infer types within two `InferenceType` instances. If they are
 
// compatible then the result indicates which of the arguments were modified.
 
// After successful inference the parts of the inferred type have equal length.
 
//
 
// Personal note: inference is not the copying of types: this algorithm must
 
// infer that `TypeOuter<TypeA, auto>` and `Struct<auto, TypeB>` resolves to
 
// infer that `TypeOuter<TypeA, auto>` and `TypeOuter<auto, TypeB>` resolves to
 
// `TypeOuter<TypeA, TypeB>`.
 
unsafe fn progress_inference_types(a: *mut InferenceType, b: *mut InferenceType) -> InferenceResult {
 
    let a = &mut *a;
 
    let b = &mut *b;
 
    debug_assert!(!a.parts.is_empty());
 
    debug_assert!(!b.parts.is_empty());
 

	
 
    // Iterate over the elements of both types. Each (partially) known type
 
    // element must be compatible. Unknown types will be replaced by their
 
    // concrete counterpart if possible.
 
    let mut modified_a = false;
 
    let mut modified_b = false;
 
    let mut iter_idx = 0;
 

	
 
    while iter_idx < a.parts.len() {
 
        let a_part = &a.parts[iter_idx];
 
        let b_part = &b.parts[iter_idx];
 

	
 
        if a_part == b_part {
 
            // Exact match
 
            iter_idx += 1;
 
            continue;
 
        }
 

	
 
        // Not an exact match, so deal with edge cases
 
        // - inference of integerlike types to conrete integers
 
        if a_part == InferredPart::IntegerLike && b_part.is_concrete_int() {
 
        if *a_part == InferredPart::IntegerLike && b_part.is_concrete_int() {
 
            modified_a = true;
 
            a.parts[iter_idx] = b_part.clone();
 
            iter_idx += 1;
 
            continue;
 
        }
 
        if b_part == InferredPart::IntegerLike && a_part.is_concrete_int() {
 
        if *b_part == InferredPart::IntegerLike && a_part.is_concrete_int() {
 
            modified_b = true;
 
            b.parts[iter_idx] = a_part.clone();
 
            iter_idx += 1;
 
            continue;
 
        }
 

	
 
        // - inference of unknown type
 
        if a_part == InferredPart::Unknown {
 
        if *a_part == InferredPart::Unknown {
 
            let end_idx = b.find_subtree_end_idx(iter_idx);
 
            a.parts[iter_idx] = b.parts[iter_idx].clone();
 
            for insert_idx in (iter_idx + 1)..end_idx {
 
                a.parts.insert(insert_idx, b.parts[insert_idx].clone());
 
            }
 

	
 
            modified_a = true;
 
            iter_idx = end_idx;
 
            continue;
 
        }
 

	
 
        if b_part == InferredPart::Unknown {
 
        if *b_part == InferredPart::Unknown {
 
            let end_idx = a.find_subtree_end_idx(iter_idx);
 
            b.parts[iter_idx] = a.parts[iter_idx].clone();
 
            for insert_idx in (iter_idx + 1)..end_idx {
 
                b.parts.insert(insert_idx, a.parts[insert_idx].clone());
 
            }
 

	
 
            modified_b = true;
 
            iter_idx = end_idx;
 
            continue;
 
        }
 

	
 
        // If here then we cannot match the two parts
 
        return InferenceResult::Incompatible;
 
    }
 

	
 
    // TODO: @performance, can be done inline
 
    a.done = true;
 
    for part in &a.parts {
 
        if part == InferredPart::Unknown {
 
        if *part == InferredPart::Unknown || *part == InferredPart::IntegerLike {
 
            a.done = false;
 
            break
 
        }
 
    }
 

	
 
    b.done = true;
 
    for part in &b.parts {
 
        if part == InferredPart::Unknown {
 
        if *part == InferredPart::Unknown || *part == InferredPart::IntegerLike {
 
            b.done = false;
 
            break;
 
        }
 
    }
 

	
 
    // If the inference parts are correctly constructed (>0 elements, proper
 
    // trees) then being here means that both types are not only of compatible
 
    // types, but MUST have the same length.
 
    debug_assert_eq!(a.parts.len(), b.parts.len());
 
    match (modified_a, modified_b) {
 
        (true, true) => InferenceResult::Both,
 
        (true, false) => InferenceResult::First,
 
@@ -438,25 +497,25 @@ impl Visitor2 for TypeResolvingVisitor {
 

	
 
    fn visit_return_stmt(&mut self, ctx: &mut Ctx, id: ReturnStatementId) -> VisitorResult {
 
        let return_stmt = &ctx.heap[id];
 
        let expr_id = return_stmt.expression;
 

	
 
        self.visit_expr(ctx, expr_id)
 
    }
 

	
 
    fn visit_assert_stmt(&mut self, ctx: &mut Ctx, id: AssertStatementId) -> VisitorResult {
 
        let assert_stmt = &ctx.heap[id];
 
        let test_expr_id = assert_stmt.expression;
 

	
 
        self.visit_expr(ctx, expr_id)
 
        self.visit_expr(ctx, test_expr_id)
 
    }
 

	
 
    fn visit_new_stmt(&mut self, ctx: &mut Ctx, id: NewStatementId) -> VisitorResult {
 
        let new_stmt = &ctx.heap[id];
 
        let call_expr_id = new_stmt.expression;
 

	
 
        self.visit_call_expr(ctx, call_expr_id)
 
    }
 

	
 
    fn visit_put_stmt(&mut self, ctx: &mut Ctx, id: PutStatementId) -> VisitorResult {
 
        let put_stmt = &ctx.heap[id];
 

	
 
@@ -472,158 +531,182 @@ impl Visitor2 for TypeResolvingVisitor {
 

	
 
    fn visit_expr_stmt(&mut self, ctx: &mut Ctx, id: ExpressionStatementId) -> VisitorResult {
 
        let expr_stmt = &ctx.heap[id];
 
        let subexpr_id = expr_stmt.expression;
 

	
 
        self.visit_expr(ctx, subexpr_id)
 
    }
 

	
 
    // Expressions
 

	
 
    fn visit_assignment_expr(&mut self, ctx: &mut Ctx, id: AssignmentExpressionId) -> VisitorResult {
 
        let upcast_id = id.upcast();
 
        self.expr_types.insert(upcast_id, self.determine_initial_expr_inference_type(ctx, upcast_id));
 
        self.insert_initial_expr_inference_type(ctx, upcast_id);
 

	
 
        let assign_expr = &ctx.heap[id];
 
        let left_expr_id = assign_expr.left;
 
        let right_expr_id = assign_expr.right;
 

	
 
        self.visit_expr(ctx, left_expr_id)?;
 
        self.visit_expr(ctx, right_expr_id)?;
 

	
 
        // TODO: Initial progress?
 
    }
 

	
 
    fn visit_conditional_expr(&mut self, ctx: &mut Ctx, id: ConditionalExpressionId) -> VisitorResult {
 
        let upcast_id = id.upcast();
 
        self.expr_types.insert(upcast_id, self.determine_initial_expr_inference_type(ctx, upcast_id));
 
        self.insert_initial_expr_inference_type(ctx, upcast_id);
 

	
 
        let conditional_expr = &ctx.heap[id];
 
        let test_expr_id = conditional_expr.test;
 
        let true_expr_id = conditional_expr.true_expression;
 
        let false_expr_id = conditional_expr.false_expression;
 

	
 
        self.expr_types.insert(test_expr_id, InferenceType::new(true, vec![InferredPart::Bool]));
 
        self.visit_expr(ctx, test_expr_id)?;
 
        self.visit_expr(ctx, true_expr_id)?;
 
        self.visit_expr(ctx, false_expr_id)?;
 

	
 
        // TODO: Initial progress?
 
        Ok(())
 
    }
 
}
 

	
 
enum TypeClass {
 
    Numeric, // int and float
 
    Integer,
 
}
 

	
 
impl std::fmt::Display for TypeClass {
 
    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
 
        write!(f, "{}", match self {
 
            TypeClass::Numeric => "numeric",
 
            TypeClass::Integer => "integer"
 
        })
 
    }
 
}
 

	
 
macro_rules! debug_assert_expr_ids_unique_and_known {
 
    // Base case for a single expression ID
 
    ($id:ident) => {
 
    ($resolver:ident, $id:ident) => {
 
        if cfg!(debug_assertions) {
 
            self.expr_types.contains_key(&$id);
 
            $resolver.expr_types.contains_key(&$id);
 
        }
 
    };
 
    // Base case for two expression IDs
 
    ($id1:ident, $id2:ident) => {
 
    ($resolver:ident, $id1:ident, $id2:ident) => {
 
        debug_assert_ne!($id1, $id2);
 
        debug_assert_expr_id!($id1);
 
        debug_assert_expr_id!($id2);
 
        debug_assert_expr_ids_unique_and_known!($resolver, $id1);
 
        debug_assert_expr_ids_unique_and_known!($resolver, $id2);
 
    };
 
    // Generic case
 
    ($id1:ident, $id2:ident, $($tail:ident),+) => {
 
    ($resolver:ident, $id1:ident, $id2:ident, $($tail:ident),+) => {
 
        debug_assert_ne!($id1, $id2);
 
        debug_assert_expr_id!($id1);
 
        debug_assert_expr_id!($id2, $($tail),+);
 
        debug_assert_expr_ids_unique_and_known!($resolver, $id1);
 
        debug_assert_expr_ids_unique_and_known!($resolver, $id2, $($tail),+);
 
    };
 
}
 

	
 
enum TypeConstraintResult {
 
    Progress, // Success: Made progress in applying constraints
 
    NoProgess, // Success: But did not make any progress in applying constraints
 
    ErrExprType, // Error: Expression type did not match the argument(s) of the expression type
 
    ErrArgType, // Error: Expression argument types did not match
 
}
 

	
 
impl TypeResolvingVisitor {
 
    fn progress_assignment_expr(&mut self, ctx: &mut Ctx, id: AssignmentExpressionId) {
 
    fn progress_assignment_expr(&mut self, ctx: &mut Ctx, id: AssignmentExpressionId) -> Result<bool, ParseError2> {
 
        use AssignmentOperator as AO;
 

	
 
        // TODO: Assignable check
 
        let (type_class, arg1_expr_id, arg2_expr_id) = {
 
            let expr = &ctx.heap[id];
 
            let type_class = match expr.operation {
 
                AO::Set =>
 
                    None,
 
                AO::Multiplied | AO::Divided | AO::Added | AO::Subtracted =>
 
                    Some(TypeClass::Numeric),
 
                AO::Remained | AO::ShiftedLeft | AO::ShiftedRight |
 
                AO::BitwiseAnded | AO::BitwiseXored | AO::BitwiseOred =>
 
                    Some(TypeClass::Integer),
 
            };
 

	
 
            (type_class, expr.left, expr.right)
 
        };
 

	
 
        let upcast_id = id.upcast();
 
        self.apply_equal3_constraint(ctx, upcast_id, arg1_expr_id, arg2_expr_id);
 
        let made_progress = self.apply_equal3_constraint(ctx, upcast_id, arg1_expr_id, arg2_expr_id)?;
 

	
 
        if let Some(type_class) = type_class {
 
            self.expr_type_is_of_type_class(ctx, id.upcast(), type_class)?
 
        }
 

	
 
        Ok(made_progress)
 
    }
 

	
 
    /// Applies a type constraint that expects all three provided types to be
 
    /// equal. In case we can make progress in inferring the types then we
 
    /// attempt to do so. If the call is successful then the composition of all
 
    /// types is made equal.
 
    fn apply_equal3_constraint(
 
        &mut self, ctx: &mut Ctx, expr_id: ExpressionId,
 
        arg1_id: ExpressionId, arg2_id: ExpressionId
 
    ) -> Result<bool, ParseError2> {
 
        // Safety: all expression IDs are always distinct, and we do not modify
 
        //  the container
 
        debug_assert_expr_ids_unique_and_known!(id1, id2, id3);
 
        debug_assert_expr_ids_unique_and_known!(self, expr_id, arg1_id, arg2_id);
 
        let expr_type: *mut _ = self.expr_types.get_mut(&expr_id).unwrap();
 
        let arg1_type: *mut _ = self.expr_types.get_mut(&arg1_id).unwrap();
 
        let arg2_type: *mut _ = self.expr_types.get_mut(&arg2_id).unwrap();
 

	
 
        let expr_res = unsafe{ progress_inference_types(expr_type, arg1_type) };
 
        if expr_res == InferenceResult::Incompatible { return TypeConstraintResult::ErrExprType; }
 
        if expr_res == InferenceResult::Incompatible { 
 
            return Err(self.construct_expr_type_error(ctx, expr_id, arg1_id));
 
        }
 

	
 
        let args_res = unsafe{ progress_inference_types(arg1_type, arg2_type) };
 
        if args_res == InferenceResult::Incompatible { return TypeConstraintResult::ErrArgType; }
 

	
 
        // If all types are compatible, but the second call caused type2 to be
 
        // expanded, then we must also re-expand type1.
 
        if args_res.modified_lhs() {
 
            type1.parts.clear();
 
            type1.parts.extend(&type2.parts);
 
        if args_res == InferenceResult::Incompatible { 
 
            return Err(self.construct_arg_type_error(ctx, expr_id, arg1_id, arg2_id)); 
 
        }
 

	
 
        if expr_res.modified_any() || args_res.modified_any() {
 
            TypeConstraintResult::Progress
 
        } else {
 
            TypeConstraintResult::NoProgess
 
        // If all types are compatible, but the second call caused the arg1_type
 
        // to be expanded, then we must also assign this to expr_type.
 
        if args_res.modified_lhs() { 
 
            unsafe {
 
                (*expr_type).parts.clear();
 
                (*expr_type).parts.extend((*arg2_type).parts.iter());
 
            }
 
        }
 

	
 
        let made_progress = expr_res.modified_any() || args_res.modified_any();
 
        Ok(made_progress)
 
    }
 

	
 
    /// Applies a typeclass constraint: checks if the type is of a particular
 
    /// class or not
 
    fn expr_type_is_of_type_class(
 
        &mut self, ctx: &mut Ctx, expr_id: ExpressionId, type_class: TypeClass
 
    ) -> bool {
 
        debug_assert_expr_ids_unique_and_known!(expr_id);
 
    ) -> Result<(), ParseError2> {
 
        debug_assert_expr_ids_unique_and_known!(self, expr_id);
 
        let expr_type = self.expr_types.get(&expr_id).unwrap();
 

	
 
        match type_class {
 
        let is_ok = match type_class {
 
            TypeClass::Numeric => expr_type.might_be_numeric(),
 
            TypeClass::Integer => expr_type.might_be_integer()
 
        };
 

	
 
        if is_ok {
 
            Ok(())
 
        } else {
 
            Err(self.construct_type_class_error(ctx, expr_id, type_class))
 
        }
 
    }
 

	
 
    /// Determines the `InferenceType` for the expression based on the
 
    /// expression parent. Note that if the parent is another expression, we do
 
    /// not take special action, instead we let parent expressions fix the type
 
    /// of subexpressions before they have a chance to call this function.
 
    /// Hence: if the expression type is already set, this function doesn't do
 
    /// anything.
 
    fn insert_initial_expr_inference_type(
 
        &mut self, ctx: &mut Ctx, expr_id: ExpressionId
 
    ) {
 
@@ -759,26 +842,69 @@ impl TypeResolvingVisitor {
 
    /// "if statement" or a "logical not" always expecting a boolean)
 
    fn construct_expr_type_error(
 
        &self, ctx: &Ctx, expr_id: ExpressionId, arg_id: ExpressionId
 
    ) -> ParseError2 {
 
        // TODO: Expand and provide more meaningful information for humans
 
        let expr = &ctx.heap[expr_id];
 
        let arg_expr = &ctx.heap[arg_id];
 
        let expr_type = self.expr_types.get(&expr_id).unwrap();
 
        let arg_type = self.expr_types.get(&arg_id).unwrap();
 

	
 
        return ParseError2::new_error(
 
            &ctx.module.source, expr.position(),
 
            "Incompatible types: this expression expected a '{}'"
 
            &format!(
 
                "Incompatible types: this expression expected a '{}'", 
 
                expr_type.display_name(&ctx.heap)
 
            )
 
        ).with_postfixed_info(
 
            &ctx.module.source, arg_expr.position(),
 
            "But this expression yields a '{}'"
 
            &format!(
 
                "But this expression yields a '{}'",
 
                arg_type.display_name(&ctx.heap)
 
            )
 
        )
 
    }
 

	
 
    fn construct_arg_type_error(
 
        &self, ctx: &Ctx, expr_id: ExpressionId,
 
        arg1_id: ExpressionId, arg2_id: ExpressionId
 
    ) -> ParseError2 {
 
        // TODO: Expand and provide more meaningful information for humans
 
        let expr = &ctx.heap[expr_id];
 
        let arg1 = &ctx.heap[arg1_id];
 
        let arg2 = &ctx.heap[arg2_id];
 

	
 
        let arg1_type = self.expr_types.get(&arg1_id).unwrap();
 
        let arg2_type = self.expr_types.get(&arg2_id).unwrap();
 

	
 
        return ParseError2::new_error(
 
            &ctx.module.source, expr.position(),
 
            "Incompatible types: cannot apply this expression"
 
        ).with_postfixed_info(
 
            &ctx.module.source, arg1.position(),
 
            &format!(
 
                "Because this expression has type '{}'",
 
                arg1_type.display_name(&ctx.heap)
 
            )
 
        ).with_postfixed_info(
 
            &ctx.module.source, arg2.position(),
 
            &format!(
 
                "But this expression has type '{}'",
 
                arg2_type.display_name(&ctx.heap)
 
            )
 
        )
 
    }
 

	
 
    fn construct_type_class_error(
 
        &self, ctx: &Ctx, expr_id: ExpressionId, type_class: TypeClass
 
    ) -> ParseError2 {
 
        let expr = &ctx.heap[expr_id];
 
        let expr_type = self.expr_types.get(&expr_id).unwrap();
 

	
 
        return ParseError2::new_error(
 
            &ctx.module.source, expr.position(),
 
            &format!(
 
                "Incompatible types: got a '{}' but expected a {} type",
 
                expr_type.display_name(&ctx.heap), type_class
 
            )
 
        )
 
    }
 
}
 
\ No newline at end of file
0 comments (0 inline, 0 general)