From b07796bf6c5fa84fe8b75c82d6d4ebab648d8f23 2021-03-28 14:56:48
From: MH <henger@cwi.nl>
Date: 2021-03-28 14:56:48
Subject: [PATCH] fix nested field access inference test

---

diff --git a/src/protocol/ast.rs b/src/protocol/ast.rs
index c73bcdf4b710390a3a21bc6c0202cf5dd089137f..829ec525d7d64a682ab17ae0f0ee5cafbbc07d14 100644
--- a/src/protocol/ast.rs
+++ b/src/protocol/ast.rs
@@ -2144,6 +2144,22 @@ impl Expression {
             Expression::Variable(expr) => expr.parent = parent,
         }
     }
+    pub fn get_type(&self) -> &ConcreteType {
+        match self {
+            Expression::Assignment(expr) => &expr.concrete_type,
+            Expression::Conditional(expr) => &expr.concrete_type,
+            Expression::Binary(expr) => &expr.concrete_type,
+            Expression::Unary(expr) => &expr.concrete_type,
+            Expression::Indexing(expr) => &expr.concrete_type,
+            Expression::Slicing(expr) => &expr.concrete_type,
+            Expression::Select(expr) => &expr.concrete_type,
+            Expression::Array(expr) => &expr.concrete_type,
+            Expression::Literal(expr) => &expr.concrete_type,
+            Expression::Call(expr) => &expr.concrete_type,
+            Expression::Variable(expr) => &expr.concrete_type,
+        }
+    }
+
     // TODO: @cleanup
     pub fn get_type_mut(&mut self) -> &mut ConcreteType {
         match self {
diff --git a/src/protocol/parser/type_resolver.rs b/src/protocol/parser/type_resolver.rs
index 8026f5a77aeaa37785acaf06035c479848cfd828..41107292a061244f03241d946db7858f34de9fa4 100644
--- a/src/protocol/parser/type_resolver.rs
+++ b/src/protocol/parser/type_resolver.rs
@@ -26,19 +26,23 @@
 /// references to polymorphic variables. Any later pass will perform just the
 /// type checking.
 ///
-/// TODO: Needs an optimization pass
-/// TODO: Needs a cleanup pass
-///     slightly more specific: initially it seemed that expressions just need 
-///     to apply constraints between their expression types and the expression
-///     types of the arguments. But it seems to appear more-and-more that the 
-///     expressions may need their own little datastructures. Using some kind of
-///     scratch allocator and proper considerations for dependencies might lead 
-///     to a much more efficient algorithm.
-/// Also: Perhaps instead of this serialized tree with markers, we could 
-///     investigate writing it as a graph, where ocurrences of polymorphic 
-///     variables all point to the same type.
-/// TODO: Disallow `Void` types in various expressions (and other future types)
-/// TODO: Maybe remove msg type?
+/// TODO: Needs a thorough rewrite:
+///  1. For polymorphic type inference we need to have an extra datastructure
+///     for progressing the polymorphic variables and mapping them back to each
+///     signature type that uses that polymorphic type. The two types of markers
+///     became somewhat of a mess.
+///  2. We're doing a lot of extra work. It seems better to apply the initial
+///     type based on expression parents, then to apply forced constraints (arg
+///     to a fires() call must be port-like), only then to start progressing the
+///     types.
+///     Furthermore, queueing of expressions can be more intelligent, currently
+///     every child/parent of an expression is inferred again when queued. Hence
+///     we need to queue only specific children/parents of expressions.
+///  3. Remove the `msg` type?
+///  4. Disallow certain types in certain operations (e.g. `Void`).
+///  5. Implement implicit and explicit casting.
+///  6. Investigate different ways of performing the type-on-type inference,
+///     maybe there is a better way then flattened trees + markers?
 
 macro_rules! enabled_debug_print {
     (false, $name:literal, $format:literal) => {};
@@ -90,7 +94,6 @@ pub(crate) enum InferenceTypePart {
     // polymorphs: an expression may determine the polymorphs type, after we
     // need to apply that information to all other places where the polymorph is
     // used.
-    // TODO: @rename to something appropriate, I keep confusing myself...
     MarkerDefinition(usize), // marker for polymorph types on a procedure's definition
     MarkerBody(usize), // marker for polymorph types within a procedure body
     // Completely unknown type, needs to be inferred
@@ -373,21 +376,27 @@ impl InferenceType {
         // Inference of a completely unknown type
         if *to_infer_part == ITP::Unknown {
             // template part is different, so cannot be unknown, hence copy the
-            // entire subtree
+            // entire subtree. Make sure to copy definition markers, but not to
+            // transfer polymorph markers.
             let template_end_idx = Self::find_subtree_end_idx(template_parts, *template_idx);
-            let erase_offset = if let Some(marker) = template_definition_marker {
+            let template_start_idx = if let Some(marker) = template_definition_marker {
                 to_infer.parts[*to_infer_idx] = ITP::MarkerDefinition(marker);
-                *to_infer_idx += 1;
-                0
+                *template_idx
             } else {
-                1
+                to_infer.parts[*to_infer_idx] = template_parts[*template_idx].clone();
+                *template_idx + 1
             };
+            *to_infer_idx += 1;
 
-            to_infer.parts.splice(
-                *to_infer_idx..*to_infer_idx + erase_offset,
-                template_parts[*template_idx..template_end_idx].iter().cloned()
-            );
-            *to_infer_idx += template_end_idx - *template_idx;
+            for template_idx in template_start_idx..template_end_idx {
+                let template_part = &template_parts[template_idx];
+                if let ITP::MarkerBody(_) = template_part {
+                    // Do not copy this one
+                } else {
+                    to_infer.parts.insert(*to_infer_idx, template_part.clone());
+                    *to_infer_idx += 1;
+                }
+            }
             *template_idx = template_end_idx;
 
             // Note: by definition the LHS was Unknown and the RHS traversed a 
@@ -647,8 +656,8 @@ impl InferenceType {
         }
     }
 
-    /// Writes a human-readable version of the type to a string. Mostly a
-    /// function for interior use.
+    /// Writes a human-readable version of the type to a string. This is used
+    /// to display error messages
     fn write_display_name(
         buffer: &mut String, heap: &Heap, parts: &[InferenceTypePart], mut idx: usize
     ) -> usize {
@@ -805,11 +814,12 @@ enum SingleInferenceResult {
 }
 
 enum DefinitionType{
-    None,
+    None, // Token value, never used during actual inference
     Component(ComponentId),
     Function(FunctionId),
 }
 
+#[derive(PartialEq, Eq)]
 pub(crate) struct ResolveQueueElement {
     pub(crate) root_id: RootId,
     pub(crate) definition_id: DefinitionId,
@@ -1299,8 +1309,18 @@ impl TypeResolvingVisitor {
             self.progress_expr(ctx, next_expr_id)?;
         }
 
-        // Should have inferred everything. Check for this and optionally
-        // auto-infer the remaining types
+        // We check if we have all the types we need. If we're typechecking a 
+        // polymorphic procedure more than once, then we have already annotated
+        // the AST and have now performed typechecking for a different 
+        // monomorph. In that case we just need to perform typechecking, no need
+        // to annotate the AST again.
+        let definition_id = match &self.definition_type {
+            DefinitionType::Component(id) => id.upcast(),
+            DefinitionType::Function(id) => id.upcast(),
+            _ => unreachable!(),
+        };
+
+        let already_checked = ctx.types.get_base_definition(&definition_id).unwrap().has_any_monomorph();
         for (expr_id, expr_type) in self.expr_types.iter_mut() {
             if !expr_type.is_done {
                 // Auto-infer numberlike/integerlike types to a regular int
@@ -1318,17 +1338,31 @@ impl TypeResolvingVisitor {
                 }
             }
 
-            let concrete_type = ctx.heap[*expr_id].get_type_mut();
-            expr_type.write_concrete_type(concrete_type);
+            if !already_checked {
+                let concrete_type = ctx.heap[*expr_id].get_type_mut();
+                expr_type.write_concrete_type(concrete_type);
+            } else {
+                if cfg!(debug_assertions) {
+                    let mut concrete_type = ConcreteType::default();
+                    expr_type.write_concrete_type(&mut concrete_type);
+                    debug_assert_eq!(*ctx.heap[*expr_id].get_type(), concrete_type);
+                }
+            }
         }
 
+        // All types are fine
+        ctx.types.add_monomorph(&definition_id, self.poly_vars.clone());
+
         // Check all things we need to monomorphize
         // TODO: Struct/enum/union monomorphization
         for (expr_id, extra_data) in self.extra_data.iter() {
             if extra_data.poly_vars.is_empty() { continue; }
 
             // Retrieve polymorph variable specification. Those of struct 
-            // literals and those of procedure calls need to be fully inferred
+            // literals and those of procedure calls need to be fully inferred.
+            // The remaining ones (e.g. select expressions) allow partial 
+            // inference of types, as long as the accessed field's type is
+            // fully inferred.
             let needs_full_inference = match &ctx.heap[*expr_id] {
                 Expression::Call(_) => true,
                 Expression::Literal(_) => true,
@@ -1371,12 +1405,15 @@ impl TypeResolvingVisitor {
 
                                 // Pre-emptively add the monomorph to the type table, but
                                 // we still need to perform typechecking on it
-                                ctx.types.add_monomorph(&definition_id, monomorph_types.clone());
-                                queue.push(ResolveQueueElement {
+                                // TODO: Unsure about this, performance wise
+                                let queue_element = ResolveQueueElement{
                                     root_id,
                                     definition_id,
                                     monomorph_types,
-                                })
+                                };
+                                if !queue.contains(&queue_element) {
+                                    queue.push(queue_element);
+                                }
                             }
                         }
                     },
diff --git a/src/protocol/parser/type_table.rs b/src/protocol/parser/type_table.rs
index 3ffbec0d57854dd810667d2d30eb6f3c5ab44230..23fac82a8ef1b7bed1943c401650645b1cfbfe37 100644
--- a/src/protocol/parser/type_table.rs
+++ b/src/protocol/parser/type_table.rs
@@ -122,7 +122,11 @@ impl DefinedType {
         self.monomorphs.push(types);
     }
 
-    fn has_monomorph(&self, types: &Vec<ConcreteType>) -> bool {
+    pub(crate) fn has_any_monomorph(&self) -> bool {
+        !self.monomorphs.is_empty()
+    }
+
+    pub(crate) fn has_monomorph(&self, types: &Vec<ConcreteType>) -> bool {
         debug_assert_eq!(self.poly_args.len(), types.len(), "mismatch in number of polymorphic types");
         for monomorph in &self.monomorphs {
             if monomorph == types { return true; }
diff --git a/src/protocol/tests/parser_inference.rs b/src/protocol/tests/parser_inference.rs
index 59eaf4f5d9eab7803804a32ed07fbca10c89fc95..a296a14d08f9f4c4cd211f96293be3d0bfbc980c 100644
--- a/src/protocol/tests/parser_inference.rs
+++ b/src/protocol/tests/parser_inference.rs
@@ -69,71 +69,71 @@ fn test_integer_inference() {
 
 #[test]
 fn test_struct_inference() {
-    // Tester::new_single_source_expect_ok(
-    //     "by function calls",
-    //     "
-    //     struct Pair<T1, T2>{ T1 first, T2 second }
-    //     Pair<T1, T2> construct<T1, T2>(T1 first, T2 second) { 
-    //         return Pair{ first: first, second: second };
-    //     }
-    //     int fix_t1<T2>(Pair<byte, T2> arg) { return 0; }
-    //     int fix_t2<T1>(Pair<T1, int> arg) { return 0; }
-    //     int test() {
-    //         auto first = 0;
-    //         auto second = 1;
-    //         auto pair = construct(first, second);
-    //         fix_t1(pair);
-    //         fix_t2(pair);
-    //         return 0;
-    //     }
-    //     "
-    // ).for_function("test", |f| { f
-    //     .for_variable("first", |v| { v
-    //         .assert_parser_type("auto")
-    //         .assert_concrete_type("byte");
-    //     })
-    //     .for_variable("second", |v| { v
-    //         .assert_parser_type("auto")
-    //         .assert_concrete_type("int");
-    //     })
-    //     .for_variable("pair", |v| { v
-    //         .assert_parser_type("auto")
-    //         .assert_concrete_type("Pair<byte,int>");
-    //     });
-    // });
+    Tester::new_single_source_expect_ok(
+        "by function calls",
+        "
+        struct Pair<T1, T2>{ T1 first, T2 second }
+        Pair<T1, T2> construct<T1, T2>(T1 first, T2 second) { 
+            return Pair{ first: first, second: second };
+        }
+        int fix_t1<T2>(Pair<byte, T2> arg) { return 0; }
+        int fix_t2<T1>(Pair<T1, int> arg) { return 0; }
+        int test() {
+            auto first = 0;
+            auto second = 1;
+            auto pair = construct(first, second);
+            fix_t1(pair);
+            fix_t2(pair);
+            return 0;
+        }
+        "
+    ).for_function("test", |f| { f
+        .for_variable("first", |v| { v
+            .assert_parser_type("auto")
+            .assert_concrete_type("byte");
+        })
+        .for_variable("second", |v| { v
+            .assert_parser_type("auto")
+            .assert_concrete_type("int");
+        })
+        .for_variable("pair", |v| { v
+            .assert_parser_type("auto")
+            .assert_concrete_type("Pair<byte,int>");
+        });
+    });
 
-    // Tester::new_single_source_expect_ok(
-    //     "by field access",
-    //     "
-    //     struct Pair<T1, T2>{ T1 first, T2 second }
-    //     Pair<T1, T2> construct<T1, T2>(T1 first, T2 second) {
-    //         return Pair{ first: first, second: second };
-    //     }
-    //     int test() {
-    //         auto first = 0;
-    //         auto second = 1;
-    //         auto pair = construct(first, second);
-    //         byte assign_first = 0;
-    //         long assign_second = 1;
-    //         pair.first = assign_first;
-    //         pair.second = assign_second;
-    //         return 0;
-    //     }
-    //     "
-    // ).for_function("test", |f| { f
-    //     .for_variable("first", |v| { v
-    //         .assert_parser_type("auto")
-    //         .assert_concrete_type("byte");
-    //     })
-    //     .for_variable("second", |v| { v
-    //         .assert_parser_type("auto")
-    //         .assert_concrete_type("long");
-    //     })
-    //     .for_variable("pair", |v| { v
-    //         .assert_parser_type("auto")
-    //         .assert_concrete_type("Pair<byte,long>");
-    //     });
-    // });
+    Tester::new_single_source_expect_ok(
+        "by field access",
+        "
+        struct Pair<T1, T2>{ T1 first, T2 second }
+        Pair<T1, T2> construct<T1, T2>(T1 first, T2 second) {
+            return Pair{ first: first, second: second };
+        }
+        int test() {
+            auto first = 0;
+            auto second = 1;
+            auto pair = construct(first, second);
+            byte assign_first = 0;
+            long assign_second = 1;
+            pair.first = assign_first;
+            pair.second = assign_second;
+            return 0;
+        }
+        "
+    ).for_function("test", |f| { f
+        .for_variable("first", |v| { v
+            .assert_parser_type("auto")
+            .assert_concrete_type("byte");
+        })
+        .for_variable("second", |v| { v
+            .assert_parser_type("auto")
+            .assert_concrete_type("long");
+        })
+        .for_variable("pair", |v| { v
+            .assert_parser_type("auto")
+            .assert_concrete_type("Pair<byte,long>");
+        });
+    });
 
     Tester::new_single_source_expect_ok(
         "by nested field access",
@@ -152,7 +152,7 @@ fn test_struct_inference() {
     ).for_function("test", |f| { f
         .for_variable("thing", |v| { v
             .assert_parser_type("auto")
-            .assert_concrete_type("Pair<byte,Pair<byte,byte>>");
+            .assert_concrete_type("Node<byte,Node<byte,byte>>");
         });
     });
 }
\ No newline at end of file