From 26b7ce87a5e2fcf1ac863cb4f1857e6e92dd7afa Mon Sep 17 00:00:00 2001 From: Dimitri Lozeve Date: Fri, 2 Mar 2018 20:28:22 +0000 Subject: [PATCH] Clique algorithms (taken from NetworkX) --- .gitignore | 3 +- clique.py | 214 ++++++++++++++++++++++++++++++++++++++++++++++++ usa_roads.ipynb | 122 ++++++++++++--------------- 3 files changed, 269 insertions(+), 70 deletions(-) create mode 100644 clique.py diff --git a/.gitignore b/.gitignore index 0be9eca..31bd58a 100644 --- a/.gitignore +++ b/.gitignore @@ -9,4 +9,5 @@ data/ *.run.xml *.synctex.gz */auto/ -.ipynb_checkpoints/ \ No newline at end of file +.ipynb_checkpoints/ +__pycache__/ diff --git a/clique.py b/clique.py new file mode 100644 index 0000000..ca97877 --- /dev/null +++ b/clique.py @@ -0,0 +1,214 @@ +import graph_tool.all as gt + + +def find_cliques(G): + """Returns all maximal cliques in an undirected graph. + For each node *v*, a *maximal clique for v* is a largest complete + subgraph containing *v*. The largest maximal clique is sometimes + called the *maximum clique*. + This function returns an iterator over cliques, each of which is a + list of nodes. It is an iterative implementation, so should not + suffer from recursion depth issues. + Parameters + ---------- + G : graph-tool graph + An undirected graph. + Returns + ------- + iterator + An iterator over maximal cliques, each of which is a list of + nodes in `G`. The order of cliques is arbitrary. + See Also + -------- + find_cliques_recursive + A recursive version of the same algorithm. + Notes + ----- + Taken from NetworkX. + https://github.com/networkx/networkx/blob/master/networkx/algorithms/clique.py + + + To obtain a list of all maximal cliques, use + `list(find_cliques(G))`. However, be aware that in the worst-case, + the length of this list can be exponential in the number of nodes in + the graph (for example, when the graph is the complete graph). This + function avoids storing all cliques in memory by only keeping + current candidate node lists in memory during its search. + This implementation is based on the algorithm published by Bron and + Kerbosch (1973) [1]_, as adapted by Tomita, Tanaka and Takahashi + (2006) [2]_ and discussed in Cazals and Karande (2008) [3]_. It + essentially unrolls the recursion used in the references to avoid + issues of recursion stack depth (for a recursive implementation, see + :func:`find_cliques_recursive`). + This algorithm ignores self-loops and parallel edges, since cliques + are not conventionally defined with such edges. + References + ---------- + .. [1] Bron, C. and Kerbosch, J. + "Algorithm 457: finding all cliques of an undirected graph". + *Communications of the ACM* 16, 9 (Sep. 1973), 575--577. + + .. [2] Etsuji Tomita, Akira Tanaka, Haruhisa Takahashi, + "The worst-case time complexity for generating all maximal + cliques and computational experiments", + *Theoretical Computer Science*, Volume 363, Issue 1, + Computing and Combinatorics, + 10th Annual International Conference on + Computing and Combinatorics (COCOON 2004), 25 October 2006, Pages 28--42 + + .. [3] F. Cazals, C. Karande, + "A note on the problem of reporting maximal cliques", + *Theoretical Computer Science*, + Volume 407, Issues 1--3, 6 November 2008, Pages 564--568, + + """ + if len(G.get_vertices()) == 0: + return + + adj = {u: {v for v in G.get_out_neighbors(u)} for u in G.vertices()} + Q = [None] + + subg = set(G.get_vertices()) + cand = set(G.get_vertices()) + u = max(subg, key=lambda u: len(cand & adj[u])) + ext_u = cand - adj[u] + stack = [] + + try: + while True: + if ext_u: + q = ext_u.pop() + cand.remove(q) + Q[-1] = q + adj_q = adj[q] + subg_q = subg & adj_q + if not subg_q: + yield Q[:] + else: + cand_q = cand & adj_q + if cand_q: + stack.append((subg, cand, ext_u)) + Q.append(None) + subg = subg_q + cand = cand_q + u = max(subg, key=lambda u: len(cand & adj[u])) + ext_u = cand - adj[u] + else: + Q.pop() + subg, cand, ext_u = stack.pop() + except IndexError: + pass + + +def find_cliques_recursive(G): + """Returns all maximal cliques in a graph. + For each node *v*, a *maximal clique for v* is a largest complete + subgraph containing *v*. The largest maximal clique is sometimes + called the *maximum clique*. + This function returns an iterator over cliques, each of which is a + list of nodes. It is a recursive implementation, so may suffer from + recursion depth issues. + Parameters + ---------- + G : graph-tool graph + Returns + ------- + iterator + An iterator over maximal cliques, each of which is a list of + nodes in `G`. The order of cliques is arbitrary. + See Also + -------- + find_cliques + An iterative version of the same algorithm. + Notes + ----- + Taken from NetworkX. + https://github.com/networkx/networkx/blob/master/networkx/algorithms/clique.py + + To obtain a list of all maximal cliques, use + `list(find_cliques_recursive(G))`. However, be aware that in the + worst-case, the length of this list can be exponential in the number + of nodes in the graph (for example, when the graph is the complete + graph). This function avoids storing all cliques in memory by only + keeping current candidate node lists in memory during its search. + This implementation is based on the algorithm published by Bron and + Kerbosch (1973) [1]_, as adapted by Tomita, Tanaka and Takahashi + (2006) [2]_ and discussed in Cazals and Karande (2008) [3]_. For a + non-recursive implementation, see :func:`find_cliques`. + This algorithm ignores self-loops and parallel edges, since cliques + are not conventionally defined with such edges. + References + ---------- + .. [1] Bron, C. and Kerbosch, J. + "Algorithm 457: finding all cliques of an undirected graph". + *Communications of the ACM* 16, 9 (Sep. 1973), 575--577. + + .. [2] Etsuji Tomita, Akira Tanaka, Haruhisa Takahashi, + "The worst-case time complexity for generating all maximal + cliques and computational experiments", + *Theoretical Computer Science*, Volume 363, Issue 1, + Computing and Combinatorics, + 10th Annual International Conference on + Computing and Combinatorics (COCOON 2004), 25 October 2006, Pages 28--42 + + .. [3] F. Cazals, C. Karande, + "A note on the problem of reporting maximal cliques", + *Theoretical Computer Science*, + Volume 407, Issues 1--3, 6 November 2008, Pages 564--568, + + """ + if len(G.get_vertices()) == 0: + return iter([]) + + adj = {u: {v for v in G.get_out_neighbors(u)} for u in G.vertices()} + Q = [] + + def expand(subg, cand): + u = max(subg, key=lambda u: len(cand & adj[u])) + for q in cand - adj[u]: + cand.remove(q) + Q.append(q) + adj_q = adj[q] + subg_q = subg & adj_q + if not subg_q: + yield Q[:] + else: + cand_q = cand & adj_q + if cand_q: + for clique in expand(subg_q, cand_q): + yield clique + Q.pop() + + return expand(set(G.get_vertices()), set(G.get_vertices())) + + +def cliques_containing_node(G, nodes=None, cliques=None): + """Returns a list of cliques containing the given node. + Returns a single list or list of lists depending on input nodes. + Optional list of cliques can be input if already computed. + + Taken from NetworkX. + https://github.com/networkx/networkx/blob/master/networkx/algorithms/clique.py + """ + if cliques is None: + cliques = list(find_cliques(G)) + + if nodes is None: + nodes = list(G.get_vertices()) # none, get entire graph + + if not isinstance(nodes, list): # check for a list + v = nodes + # assume it is a single value + vcliques = [c for c in cliques if v in c] + else: + vcliques = {} + for v in nodes: + vcliques[v] = [c for c in cliques if v in c] + return vcliques + + +if __name__ == "__main__": + g = gt.collection.data["karate"] + cliques = list(find_cliques(g)) + print(cliques) + print(cliques_containing_node(g, 1)) diff --git a/usa_roads.ipynb b/usa_roads.ipynb index a22b17d..b415105 100644 --- a/usa_roads.ipynb +++ b/usa_roads.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": { "autoscroll": false, "collapsed": false, @@ -21,7 +21,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "metadata": { "autoscroll": false, "collapsed": false, @@ -37,7 +37,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "metadata": { "autoscroll": false, "collapsed": false, @@ -46,29 +46,14 @@ "slide_type": "-" } }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "version: 2.26 (commit b89e6b4e, Thu Nov 9 14:55:43 2017 +0000)\n", - "gcc version: 7.2.0\n", - "compilation flags: -DNDEBUG -D_FORTIFY_SOURCE=2 -fopenmp -O3 -fvisibility=default -fvisibility-inlines-hidden -Wno-deprecated -Wall -Wextra -ftemplate-backtrace-limit=0 -march=x86-64 -mtune=generic -O2 -pipe -fstack-protector-strong -fno-plt -Wl,-O1,--sort-common,--as-needed,-z,relro,-z,now\n", - "install prefix: /usr\n", - "python dir: /usr/lib/python3.6/site-packages\n", - "graph filtering: True\n", - "openmp: True\n", - "uname: Linux asha 4.15.6-1-ARCH #1 SMP PREEMPT Sun Feb 25 12:53:23 UTC 2018 x86_64\n" - ] - } - ], + "outputs": [], "source": [ "gt.show_config()" ] }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "metadata": { "autoscroll": false, "collapsed": false, @@ -85,7 +70,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "metadata": { "autoscroll": false, "collapsed": false, @@ -101,7 +86,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "metadata": { "autoscroll": false, "collapsed": false, @@ -110,16 +95,7 @@ "slide_type": "-" } }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "distance (edge) (type: int32_t)\n" - ] - } - ], + "outputs": [], "source": [ "print(G)\n", "G.list_properties()" @@ -127,7 +103,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "metadata": { "autoscroll": false, "collapsed": false, @@ -136,18 +112,7 @@ "slide_type": "-" } }, - "outputs": [ - { - "data": { - "text/plain": [ - "PropertyArray([ 803, 842, 2428, ..., 1158, 323, 368], dtype=int32)" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "dist = G.ep.get(\"distance\")\n", "dist.get_array()" @@ -155,7 +120,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "metadata": { "autoscroll": false, "collapsed": false, @@ -164,16 +129,7 @@ "slide_type": "-" } }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "0.6327185812827214\n", - "464318\n" - ] - } - ], + "outputs": [], "source": [ "filt = G.new_edge_property(\"bool\")\n", "filt.a = dist.a > 800\n", @@ -184,7 +140,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "metadata": { "autoscroll": false, "collapsed": false, @@ -193,18 +149,7 @@ "slide_type": "-" } }, - "outputs": [ - { - "data": { - "text/plain": [ - "PropertyArray([ 1, 2, 3, ..., 34918, 36421, 36946], dtype=int32)" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "ordered_dist = dist.get_array()\n", "ordered_dist = np.unique(np.sort(ordered_dist))\n", @@ -223,6 +168,45 @@ } }, "outputs": [], + "source": [ + "import clique as cl" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "condmat = gt.collection.data[\"cond-mat-2005\"]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "clique_sizes = []\n", + "for c in cl.find_cliques(condmat):\n", + " clique_sizes.append(len(c))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fig, ax = plt.subplots()\n", + "ax.hist(clique_sizes, bins=100);" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [] } ],