I work on a solution to combine static code analysis metrics (maintanability index, cyclomatic complexity, lines of code) with test code coverage. Both metrics and code coverage are hierachical (Target -> Module -> Namespace -> Type -> Member), both have certain numeric values and thus can be represented by the trees. I needed to merge maintanablity tree with code coverage tree.

Trees have the following properties:

  • One root node (these are trees and not graphs)
  • Each node can have zero or many children (these are not binary trees)
  • Tree can have duplicate keys (unfortunately)

Given tree A and tree B, here is what specifically I wanted from merging:

  • Values of the nodes with the same keys are updated
  • Nodes in B that aren't in A should be added to A with all their children
  • Nodes in A that aren't in B should be identified
  • Only unique nodes should be merged
  • There should be a statistics on how many nodes were updated, nodes in tree A which are not in B and reverse.

I started by abstracting input metrics and code coverage into this base Node class:

public class Node  
{
    public virtual string Key { get; set; }
    public virtual int? Value { get; set; }
    public string Name { get; set; }
    public MergeStatus MergeStatus { get; set; }
    public virtual IList<Node> Children { get { return null; }}


    public Node()
    {
        MergeStatus = MergeStatus.Unknown;
    }

    public IList<Node> UniqueChildren
    {
        get
        {
            var uniqueKeys = Children
                .GroupBy(g => g.Key)
                .Where(w => w.Count() == 1)
                .ToList();

            return Children.Join(uniqueKeys, c => c.Key, u => u.Key, (c, u) => c).ToList();
        }
    }

    public void UpdateValue(int? value)
    {
        this.Value = this.Value.HasValue ? this.Value + value : value;
        MergeStatus = MergeStatus.Updated;
    }

    public virtual void AddChild(Node child)
    {
    }
}

MergeStatus is this enum:

public enum MergeStatus  
{
    Unknown,
    Updated,
    MissingInOtherTree,
    NewInThisTree
}

With that main merging methods is very simple:

private void MergeNode(Node nodeA, Node nodeB)  
{
    if (nodeA.MergeStatus != MergeStatus.Updated)
    {
        nodeA.UpdateValue(nodeB.Value);
    }

    // NodeA => NodeB
    foreach (var childNodeA in nodeA.UniqueChildren)
    {
        var childNodeB = nodeB.UniqueChildren.SingleOrDefault(s => s.Equals(childNodeA));
        if (childNodeA.Equals(childNodeB))
        {
            childNodeA.UpdateValue(childNodeB.Value);
        }
        else
        {
            childNodeA.MergeStatus = MergeStatus.MissingInOtherTree;
        }
    }

    // NodeB => NodeA
    foreach (var childNodeB in nodeB.UniqueChildren)
    {
        if (nodeA.UniqueChildren.SingleOrDefault(s => s.Equals(childNodeB)) == null)
        {
            nodeA.AddChild(childNodeB);
            childNodeB.MergeStatus = MergeStatus.NewInThisTree;
        }
    }

    // Keep on going down with the updated nodes
    foreach (var newNodeA in nodeA.UniqueChildren.Where(c => c.MergeStatus == MergeStatus.Updated && c.UniqueChildren.Any()))
    {
        var newNodeB = nodeB.UniqueChildren.Single(c => c.Equals(newNodeA));
        MergeNode(newNodeA, newNodeB);
    }
}

In order to support other operations on the node I also have Tree class

public class Tree  
{
    private readonly Node _root;

    public Node Root { get { return _root; } }

    public Tree(Node root)
    {
        if (root == null)
        {
            throw new ArgumentException("Must have root", "root");
        }
        _root = root;
    }

    public Node FindNode(string key)
    {
        if (key == null)
        {
            return null;
        }

        return Nodes.FirstOrDefault(n => n.Key == key);
    }

    public IEnumerable<Node> Nodes
    {
        get
        {
            return Traverse(_root);
        }
    }

    private IEnumerable<Node> Traverse(Node node)
    {
        yield return node;
        foreach (var child in node.Children)
        {
            var nodes = Traverse(child);
            foreach (var n in nodes)
            {
                yield return n;
            }
        }
    }
}

This allows me to enumerate all nodes in the tree to build stats:

public class MergeStats  
{
    public int UpdatedCount { get; set; }
    public int MissingInOtherTreeCount { get; set; }
    public int NewInThisTreeCount { get; set; }
    public int TotalNodesCount { get; set; }

    public IList<Node> NewInThisTreeNodes { get; private set; }
    public IList<Node> MissingInOtherTreeNodes { get; private set; }


    public MergeStats()
    {
        NewInThisTreeNodes = new List<Node>();
        MissingInOtherTreeNodes = new List<Node>();
    }

    public decimal MergeRatio
    {
        get
        {
            if (TotalNodesCount == 0)
            {
                return 0;
            }
            else
            {
                return (decimal) UpdatedCount/TotalNodesCount;
            }
        }
    }

    public void Calculate(Tree tree)
    {
        if (tree == null)
        {
            return;
        }

        foreach (var node in tree.Nodes)
        {
            TotalNodesCount++;
            switch (node.MergeStatus)
            {
                case MergeStatus.Updated:
                    UpdatedCount++;
                    break;
                case MergeStatus.NewInThisTree:
                    NewInThisTreeCount++;
                    NewInThisTreeNodes.Add(node);
                    break;
                case MergeStatus.MissingInOtherTree:
                    MissingInOtherTreeCount++;
                    MissingInOtherTreeNodes.Add(node);
                    break;
            }
        }
    }
}