Source code for networkx.algorithms.tree.mst

# -*- coding: utf-8 -*-
"""
Algorithms for calculating min/max spanning trees/forests.

"""
#    Copyright (C) 2015 NetworkX Developers
#    Aric Hagberg <hagberg@lanl.gov>
#    Dan Schult <dschult@colgate.edu>
#    Pieter Swart <swart@lanl.gov>
#    Loïc Séguin-C. <loicseguin@gmail.com>
#    All rights reserved.
#    BSD license.

__all__ = [
    'minimum_spanning_edges', 'maximum_spanning_edges',
    'minimum_spanning_tree', 'maximum_spanning_tree',
]

from heapq import heappop, heappush
from itertools import count

import networkx as nx
from networkx.utils import UnionFind, not_implemented_for


@not_implemented_for('multigraph')
def boruvka_mst_edges(G, minimum=True, weight='weight', keys=False, data=True):
    """Iterates over the edges of a minimum spanning tree as computed by
    Borůvka's algorithm.

    `G` is a NetworkX graph. Also, the edges must have distinct weights,
    otherwise the edges may not form a tree.

    `weight` is the edge attribute that stores the edge weights. Each
    edge in the graph must have such an attribute, otherwise a
    :exc:`KeyError` will be raised.

    If `data` is True, this iterator yields edges of the form
    ``(u, v, d)``, where ``u`` and ``v`` are nodes and ``d`` is the edge
    attribute dictionary. Otherwise, it yields edges of the form
    ``(u, v)``.

    The `keys` argument is ignored, since this function is not
    implemented for multigraphs; it exists only for consistency with the
    other minimum spanning tree functions.

    """
    opt = min if minimum else max
    # Initialize a forest, assuming initially that it is the discrete
    # partition of the nodes of the graph.
    forest = UnionFind(G)

    def best_edge(component):
        """Returns the optimum (minimum or maximum) edge on the edge
        boundary of the given set of nodes.

        A return value of ``None`` indicates an empty boundary.

        """
        # TODO In Python 3.4 and later, we can just do
        #
        #     boundary = nx.edge_boundary(G, component, data=weight)
        #     return opt(boundary, key=lambda e: e[-1][weight], default=None)
        #
        # which is better because it doesn't require creating a list.
        boundary = list(nx.edge_boundary(G, component, data=True))
        if not boundary:
            return None
        return opt(boundary, key=lambda e: e[-1][weight])

    # Determine the optimum edge in the edge boundary of each component
    # in the forest.
    best_edges = (best_edge(component) for component in forest.to_sets())
    best_edges = [edge for edge in best_edges if edge is not None]
    # If each entry was ``None``, that means the graph was disconnected,
    # so we are done generating the forest.
    while best_edges:
        # Determine the optimum edge in the edge boundary of each
        # component in the forest.
        #
        # This must be a sequence, not an iterator. In this list, the
        # same edge may appear twice, in different orientations (but
        # that's okay, since a union operation will be called on the
        # endpoints the first time it is seen, but not the second time).
        #
        # Any ``None`` indicates that the edge boundary for that
        # component was empty, so that part of the forest has been
        # completed.
        #
        # TODO This can be parallelized, both in the outer loop over
        # each component in the forest and in the computation of the
        # minimum. (Same goes for the identical lines outside the loop.)
        best_edges = (best_edge(component) for component in forest.to_sets())
        best_edges = [edge for edge in best_edges if edge is not None]
        # Join trees in the forest using the best edges, and yield that
        # edge, since it is part of the spanning tree.
        #
        # TODO This loop can be parallelized, to an extent (the union
        # operation must be atomic).
        for u, v, d in best_edges:
            if forest[u] != forest[v]:
                if data:
                    yield u, v, d
                else:
                    yield u, v
                forest.union(u, v)


def kruskal_mst_edges(G, minimum, weight='weight', keys=True, data=True):
    subtrees = UnionFind()
    if G.is_multigraph():
        edges = G.edges(keys=True, data=True)
    else:
        edges = G.edges(data=True)
    getweight = lambda t: t[-1].get(weight, 1)
    edges = sorted(edges, key=getweight, reverse=not minimum)
    is_multigraph = G.is_multigraph()
    # Multigraphs need to handle edge keys in addition to edge data.
    if is_multigraph:
        for u, v, k, d in edges:
            if subtrees[u] != subtrees[v]:
                if keys:
                    if data:
                        yield (u, v, k, d)
                    else:
                        yield (u, v, k)
                else:
                    if data:
                        yield (u, v, d)
                    else:
                        yield (u, v)
                subtrees.union(u, v)
    else:
        for u, v, d in edges:
            if subtrees[u] != subtrees[v]:
                if data:
                    yield (u, v, d)
                else:
                    yield (u, v)
                subtrees.union(u, v)


def prim_mst_edges(G, minimum, weight='weight', keys=True, data=True):
    is_multigraph = G.is_multigraph()
    push = heappush
    pop = heappop

    nodes = list(G)
    c = count()

    sign = 1
    if not minimum:
        sign = -1

    while nodes:
        u = nodes.pop(0)
        frontier = []
        visited = [u]
        if is_multigraph:
            for u, v, k, d in G.edges(u, keys=True, data=True):
                push(frontier, (d.get(weight, 1) * sign, next(c), u, v, k))
        else:
            for u, v, d in G.edges(u, data=True):
                push(frontier, (d.get(weight, 1) * sign, next(c), u, v))
        while frontier:
            if is_multigraph:
                W, _, u, v, k = pop(frontier)
            else:
                W, _, u, v = pop(frontier)
            if v in visited:
                continue
            visited.append(v)
            nodes.remove(v)
            if is_multigraph:
                for _, w, k2, d2 in G.edges(v, keys=True, data=True):
                    if w in visited:
                        continue
                    new_weight = d2.get(weight, 1) * sign
                    push(frontier, (new_weight, next(c), v, w, k2))
            else:
                for _, w, d2 in G.edges(v, data=True):
                    if w in visited:
                        continue
                    new_weight = d2.get(weight, 1) * sign
                    push(frontier, (new_weight, next(c), v, w))
            # Multigraphs need to handle edge keys in addition to edge data.
            if is_multigraph and keys:
                if data:
                    yield u, v, k, G[u][v]
                else:
                    yield u, v, k
            else:
                if data:
                    yield u, v, G[u][v]
                else:
                    yield u, v

ALGORITHMS = {
    'boruvka': boruvka_mst_edges,
    u'borůvka': boruvka_mst_edges,
    'kruskal': kruskal_mst_edges,
    'prim': prim_mst_edges
}


@not_implemented_for('directed')
def _spanning_edges(G, minimum, algorithm='kruskal', weight='weight',
                    keys=True, data=True):
    try:
        algo = ALGORITHMS[algorithm]
    except KeyError:
        msg = '{} is not a valid choice for an algorithm.'.format(algorithm)
        raise ValueError(msg)

    return algo(G, minimum=minimum, weight=weight, keys=keys, data=data)


[docs]def minimum_spanning_edges(G, algorithm='kruskal', weight='weight', keys=True, data=True): """Generate edges in a minimum spanning forest of an undirected weighted graph. A minimum spanning tree is a subgraph of the graph (a tree) with the minimum sum of edge weights. A spanning forest is a union of the spanning trees for each connected component of the graph. Parameters ---------- G : undirected Graph An undirected graph. If `G` is connected, then the algorithm finds a spanning tree. Otherwise, a spanning forest is found. algorithm : string The algorithm to use when finding a minimum spanning tree. Valid choices are 'kruskal', 'prim', or 'boruvka'. The default is 'kruskal'. weight : string Edge data key to use for weight (default 'weight'). keys : bool Whether to yield edge key in multigraphs in addition to the edge. If `G` is not a multigraph, this is ignored. data : bool, optional If True yield the edge data along with the edge. Returns ------- edges : iterator An iterator over tuples representing edges in a minimum spanning tree of `G`. If `G` is a multigraph and both `keys` and `data` are True, then the tuples are four-tuples of the form `(u, v, k, w)`, where `(u, v)` is an edge, `k` is the edge key identifying the particular edge joining `u` with `v`, and `w` is the weight of the edge. If `keys` is True but `data` is False, the tuples are three-tuples of the form `(u, v, k)`. If `G` is not a multigraph, the tuples are of the form `(u, v, w)` if `data` is True or `(u, v)` if `data` is False. Examples -------- >>> from networkx.algorithms import tree Find minimum spanning edges by Kruskal's algorithm >>> G = nx.cycle_graph(4) >>> G.add_edge(0, 3, weight=2) >>> mst = tree.minimum_spanning_edges(G, algorithm='kruskal', data=False) >>> edgelist = list(mst) >>> sorted(edgelist) [(0, 1), (1, 2), (2, 3)] Find minimum spanning edges by Prim's algorithm >>> G = nx.cycle_graph(4) >>> G.add_edge(0, 3, weight=2) >>> mst = tree.minimum_spanning_edges(G, algorithm='prim', data=False) >>> edgelist = list(mst) >>> sorted(edgelist) [(0, 1), (1, 2), (2, 3)] Notes ----- For Borůvka's algorithm, each edge must have a weight attribute, and each edge weight must be distinct. For the other algorithms, if the graph edges do not have a weight attribute a default weight of 1 will be used. Modified code from David Eppstein, April 2006 http://www.ics.uci.edu/~eppstein/PADS/ """ return _spanning_edges(G, minimum=True, algorithm=algorithm, weight=weight, keys=keys, data=data)
[docs]def maximum_spanning_edges(G, algorithm='kruskal', weight='weight', data=True): """Generate edges in a maximum spanning forest of an undirected weighted graph. A maximum spanning tree is a subgraph of the graph (a tree) with the maximum possible sum of edge weights. A spanning forest is a union of the spanning trees for each connected component of the graph. Parameters ---------- G : undirected Graph An undirected graph. If `G` is connected, then the algorithm finds a spanning tree. Otherwise, a spanning forest is found. algorithm : string The algorithm to use when finding a maximum spanning tree. Valid choices are 'kruskal', 'prim', or 'boruvka'. The default is 'kruskal'. weight : string Edge data key to use for weight (default 'weight'). keys : bool Whether to yield edge key in multigraphs in addition to the edge. If `G` is not a multigraph, this is ignored. data : bool, optional If True yield the edge data along with the edge. Returns ------- edges : iterator An iterator over tuples representing edges in a maximum spanning tree of `G`. If `G` is a multigraph and both `keys` and `data` are True, then the tuples are four-tuples of the form `(u, v, k, w)`, where `(u, v)` is an edge, `k` is the edge key identifying the particular edge joining `u` with `v`, and `w` is the weight of the edge. If `keys` is True but `data` is False, the tuples are three-tuples of the form `(u, v, k)`. If `G` is not a multigraph, the tuples are of the form `(u, v, w)` if `data` is True or `(u, v)` if `data` is False. Examples -------- >>> from networkx.algorithms import tree Find maximum spanning edges by Kruskal's algorithm >>> G = nx.cycle_graph(4) >>> G.add_edge(0, 3, weight=2) >>> mst = tree.maximum_spanning_edges(G, algorithm='kruskal', data=False) >>> edgelist = list(mst) >>> sorted(edgelist) [(0, 1), (0, 3), (1, 2)] Find maximum spanning edges by Prim's algorithm >>> G = nx.cycle_graph(4) >>> G.add_edge(0,3,weight=2) # assign weight 2 to edge 0-3 >>> mst = tree.maximum_spanning_edges(G, algorithm='prim', data=False) >>> edgelist = list(mst) >>> sorted(edgelist) [(0, 1), (0, 3), (3, 2)] Notes ----- For Borůvka's algorithm, each edge must have a weight attribute, and each edge weight must be distinct. For the other algorithms, if the graph edges do not have a weight attribute a default weight of 1 will be used. Modified code from David Eppstein, April 2006 http://www.ics.uci.edu/~eppstein/PADS/ """ return _spanning_edges(G, minimum=False, algorithm=algorithm, weight=weight, data=data)
@not_implemented_for('directed') def _optimum_spanning_tree(G, algorithm, minimum, weight='weight'): # When creating the spanning tree, we can ignore the key used to # identify multigraph edges, since a tree is guaranteed to have no # multiedges. This is why we use `keys=False`. edges = _spanning_edges(G, minimum, algorithm=algorithm, weight=weight, keys=False, data=True) T = nx.Graph(edges) # Add isolated nodes if len(T) != len(G): T.add_nodes_from(nx.isolates(G)) # Add node and graph attributes as shallow copy for n in T: T.node[n] = G.node[n].copy() T.graph = G.graph.copy() return T
[docs]def minimum_spanning_tree(G, weight='weight', algorithm='kruskal'): """Returns a minimum spanning tree or forest on an undirected graph `G`. Parameters ---------- G : undirected graph An undirected graph. If `G` is connected, then the algorithm finds a spanning tree. Otherwise, a spanning forest is found. weight : str Data key to use for edge weights. algorithm : string The algorithm to use when finding a minimum spanning tree. Valid choices are 'kruskal', 'prim', or 'boruvka'. The default is 'kruskal'. Returns ------- G : NetworkX Graph A minimum spanning tree or forest. Examples -------- >>> G = nx.cycle_graph(4) >>> G.add_edge(0, 3, weight=2) >>> T = nx.minimum_spanning_tree(G) >>> sorted(T.edges(data=True)) [(0, 1, {}), (1, 2, {}), (2, 3, {})] Notes ----- For Borůvka's algorithm, each edge must have a weight attribute, and each edge weight must be distinct. For the other algorithms, if the graph edges do not have a weight attribute a default weight of 1 will be used. There may be more than one tree with the same minimum or maximum weight. See :mod:`networkx.tree.recognition` for more detailed definitions. """ return _optimum_spanning_tree(G, algorithm=algorithm, minimum=True, weight=weight)
[docs]def maximum_spanning_tree(G, weight='weight', algorithm='kruskal'): """Returns a maximum spanning tree or forest on an undirected graph `G`. Parameters ---------- G : undirected graph An undirected graph. If `G` is connected, then the algorithm finds a spanning tree. Otherwise, a spanning forest is found. weight : str Data key to use for edge weights. algorithm : string The algorithm to use when finding a minimum spanning tree. Valid choices are 'kruskal', 'prim', or 'boruvka'. The default is 'kruskal'. Returns ------- G : NetworkX Graph A minimum spanning tree or forest. Examples -------- >>> G = nx.cycle_graph(4) >>> G.add_edge(0, 3, weight=2) >>> T = nx.maximum_spanning_tree(G) >>> sorted(T.edges(data=True)) [(0, 1, {}), (0, 3, {'weight': 2}), (1, 2, {})] Notes ----- For Borůvka's algorithm, each edge must have a weight attribute, and each edge weight must be distinct. For the other algorithms, if the graph edges do not have a weight attribute a default weight of 1 will be used. There may be more than one tree with the same minimum or maximum weight. See :mod:`networkx.tree.recognition` for more detailed definitions. """ return _optimum_spanning_tree(G, algorithm=algorithm, minimum=False, weight=weight)