Continuing the subject of software dependencies graph, now that we have all modules (dll's and exe's) collected, how do we build the graph and find what we need in it?

Borrowing from my favorite algorithm book the core of it is directed graph where modules represented by integer vertices and dependencies by edges:

  
  
public class Digraph  
{
    public int V { get; private set; }
    public int E { get; private set; }

    private HashSet[] _adj;

    const int _initalCapacity = 100;

    public Digraph() : this(null)
    {
    }

    public void AddVertex(int v)
    {
        AddVertexInternal(v);
    }

    public void AddEdge(int v, int w)
    {
        if (v < 0)
        {
            throw new ArgumentException(nameof(v));
        }
        if (w < 0)
        {
            throw new ArgumentException(nameof(w));
        }
        AddVertexInternal(w);
        var edges = AddVertexInternal(v);
        edges.Add(w);
        E++;
    }

    public IEnumerable GetAdjList(int v)
    {
        if ((v >= V || v < 0))
        {
            throw new ArgumentException(nameof(v));
        }

        if (_adj[v] != null)
        {
            return _adj[v].AsEnumerable();
        }
        else
        {
            return null;
        }
    }

    public Digraph Reverse()
    {
        Digraph r = new Digraph();
        for (int v = 0; v < V; v++)
        {
            foreach (int w in GetAdjList(v))
            {
                r.AddEdge(w, v);
            }
        }
        return r;
    }

    private HashSet AddVertexInternal(int v)
    {
        ResizeArrayIfNeeded(v);
        HashSet edges;
        if (_adj[v] == null)
        {
            edges = new HashSet();
            _adj[v] = edges;
            if (v >= V)
            {
                V = v + 1;
            }
        }
        else
        {
            edges = _adj[v];
        }

        return edges;
    }

    private void ResizeArrayIfNeeded(int v)
    {
        while (v >= _adj.Count())
        {
            Array.Resize>(ref _adj, _adj.Count() * 2);
        }
    }
}
  

This is pretty much stock digraph with a few twists. One is that I use fixed size array of HashSet which is collection class like a bag where stuff can be added, duplicates will be ignored and order doesn't matter which is what we need for graph adjacency list. I like having a fixed size array as opposed to List because of it small footprint. Resizing an array is just one line of code. List is also backed by fixed size array, by the way.

Since module has other attributes such as name, version and description and digraph works off an integer vertex number, there's another class that does the translation between the two:

  
  
public class DependencyGraph  
{
    private readonly Dictionary _st;
    private readonly Dictionary _keys;
    private readonly Digraph _g;

    public IEnumerable Modules
    {
        get
        {
            return _st.Keys.AsEnumerable();
        }
    }

    public Digraph Digraph { get { return _g; } }

    public DependencyGraph() : this(null, new Digraph())
    {
    }

    public void AddModule(Module module)
    {
        int vertexIndex = AddVertexInternal(module);
        _g.AddVertex(vertexIndex);
    }

    public void AddDependency(Module fromModule, Module toModule)
    {
        int vi = AddVertexInternal(fromModule);
        int wi = AddVertexInternal(toModule);
        _g.AddEdge(vi, wi);
    }


    private int AddVertexInternal(Module v)
    {
        int vertexIndex;
        if (!_st.ContainsKey(v))
        {
            vertexIndex = AddKey(v);
        }
        else
        {
            vertexIndex = _st[v];
        }
        return vertexIndex;
    }

    private int AddKey(Module v)
    {
        int vertexIndex = _st.Count;
        v.Id = vertexIndex;
        _st.Add(v, vertexIndex);
        _keys.Add(vertexIndex, v);
        return vertexIndex;
    }
}
  

Here we have a symbol table in the form of Dictionary. As soon as a new module gets added, its index is added to the symbol table (lines 52-56). That index is what's used in the digraph. On the opposite site, when vertices are retrieved from the digraph, we need to go from vertex number (index) to module and for that there is inverted index for which the inserts are made at the same time.

So, given all that, how do we actually search? Let's consider this digraph (from the same book source, by the way):

In this example, if we are interested in knowing the dependencies, say vertex 0, we would like to see it's closest dependencies first followed by their dependencies, that is 5, 1, 6, 4, 9, 11, 10, 12 and for 2 we need 0, 3, 5, 1, 6, 4, 9, 11, 10, 12. What that means we need breadth-first search where we capture preorder:

public class DigraphBfs  
{
    public IEnumerable<int> Preorder { get { return _preorder; } }

    private readonly bool[] _marked;
    private readonly int _s;
    private readonly Queue<int> _preorder;

    public DigraphBfs(Digraph g, int s)
    {
        _s = s;
        _marked = new bool[g.V];
        _preorder = new Queue<int>();
        Bfs(g, s);
    }

    private void Bfs(Digraph g, int s)
    {
        var q = new Queue<int>();
        _marked[s] = true;
        q.Enqueue(s);
        while (q.Any())
        {
            int v = q.Dequeue();
            foreach (int w in g.GetAdjList(v))
            {
                if (!_marked[w])
                {
                    _marked[w] = true;
                    q.Enqueue(w);
                    _preorder.Enqueue(w);
                }
            }
        }
    }
}

That is essentially all that is required to manage and search for dependencies. What's left is graph persistence, once dependency graph is built we want to store it in permanent storage.