Clique algorithms (taken from NetworkX)

This commit is contained in:
Dimitri Lozeve 2018-03-02 20:28:22 +00:00
parent e30f30b0f9
commit 26b7ce87a5
3 changed files with 269 additions and 70 deletions

1
.gitignore vendored
View file

@ -10,3 +10,4 @@ data/
*.synctex.gz
*/auto/
.ipynb_checkpoints/
__pycache__/

214
clique.py Normal file
View file

@ -0,0 +1,214 @@
import graph_tool.all as gt
def find_cliques(G):
"""Returns all maximal cliques in an undirected graph.
For each node *v*, a *maximal clique for v* is a largest complete
subgraph containing *v*. The largest maximal clique is sometimes
called the *maximum clique*.
This function returns an iterator over cliques, each of which is a
list of nodes. It is an iterative implementation, so should not
suffer from recursion depth issues.
Parameters
----------
G : graph-tool graph
An undirected graph.
Returns
-------
iterator
An iterator over maximal cliques, each of which is a list of
nodes in `G`. The order of cliques is arbitrary.
See Also
--------
find_cliques_recursive
A recursive version of the same algorithm.
Notes
-----
Taken from NetworkX.
https://github.com/networkx/networkx/blob/master/networkx/algorithms/clique.py
To obtain a list of all maximal cliques, use
`list(find_cliques(G))`. However, be aware that in the worst-case,
the length of this list can be exponential in the number of nodes in
the graph (for example, when the graph is the complete graph). This
function avoids storing all cliques in memory by only keeping
current candidate node lists in memory during its search.
This implementation is based on the algorithm published by Bron and
Kerbosch (1973) [1]_, as adapted by Tomita, Tanaka and Takahashi
(2006) [2]_ and discussed in Cazals and Karande (2008) [3]_. It
essentially unrolls the recursion used in the references to avoid
issues of recursion stack depth (for a recursive implementation, see
:func:`find_cliques_recursive`).
This algorithm ignores self-loops and parallel edges, since cliques
are not conventionally defined with such edges.
References
----------
.. [1] Bron, C. and Kerbosch, J.
"Algorithm 457: finding all cliques of an undirected graph".
*Communications of the ACM* 16, 9 (Sep. 1973), 575--577.
<http://portal.acm.org/citation.cfm?doid=362342.362367>
.. [2] Etsuji Tomita, Akira Tanaka, Haruhisa Takahashi,
"The worst-case time complexity for generating all maximal
cliques and computational experiments",
*Theoretical Computer Science*, Volume 363, Issue 1,
Computing and Combinatorics,
10th Annual International Conference on
Computing and Combinatorics (COCOON 2004), 25 October 2006, Pages 28--42
<https://doi.org/10.1016/j.tcs.2006.06.015>
.. [3] F. Cazals, C. Karande,
"A note on the problem of reporting maximal cliques",
*Theoretical Computer Science*,
Volume 407, Issues 1--3, 6 November 2008, Pages 564--568,
<https://doi.org/10.1016/j.tcs.2008.05.010>
"""
if len(G.get_vertices()) == 0:
return
adj = {u: {v for v in G.get_out_neighbors(u)} for u in G.vertices()}
Q = [None]
subg = set(G.get_vertices())
cand = set(G.get_vertices())
u = max(subg, key=lambda u: len(cand & adj[u]))
ext_u = cand - adj[u]
stack = []
try:
while True:
if ext_u:
q = ext_u.pop()
cand.remove(q)
Q[-1] = q
adj_q = adj[q]
subg_q = subg & adj_q
if not subg_q:
yield Q[:]
else:
cand_q = cand & adj_q
if cand_q:
stack.append((subg, cand, ext_u))
Q.append(None)
subg = subg_q
cand = cand_q
u = max(subg, key=lambda u: len(cand & adj[u]))
ext_u = cand - adj[u]
else:
Q.pop()
subg, cand, ext_u = stack.pop()
except IndexError:
pass
def find_cliques_recursive(G):
"""Returns all maximal cliques in a graph.
For each node *v*, a *maximal clique for v* is a largest complete
subgraph containing *v*. The largest maximal clique is sometimes
called the *maximum clique*.
This function returns an iterator over cliques, each of which is a
list of nodes. It is a recursive implementation, so may suffer from
recursion depth issues.
Parameters
----------
G : graph-tool graph
Returns
-------
iterator
An iterator over maximal cliques, each of which is a list of
nodes in `G`. The order of cliques is arbitrary.
See Also
--------
find_cliques
An iterative version of the same algorithm.
Notes
-----
Taken from NetworkX.
https://github.com/networkx/networkx/blob/master/networkx/algorithms/clique.py
To obtain a list of all maximal cliques, use
`list(find_cliques_recursive(G))`. However, be aware that in the
worst-case, the length of this list can be exponential in the number
of nodes in the graph (for example, when the graph is the complete
graph). This function avoids storing all cliques in memory by only
keeping current candidate node lists in memory during its search.
This implementation is based on the algorithm published by Bron and
Kerbosch (1973) [1]_, as adapted by Tomita, Tanaka and Takahashi
(2006) [2]_ and discussed in Cazals and Karande (2008) [3]_. For a
non-recursive implementation, see :func:`find_cliques`.
This algorithm ignores self-loops and parallel edges, since cliques
are not conventionally defined with such edges.
References
----------
.. [1] Bron, C. and Kerbosch, J.
"Algorithm 457: finding all cliques of an undirected graph".
*Communications of the ACM* 16, 9 (Sep. 1973), 575--577.
<http://portal.acm.org/citation.cfm?doid=362342.362367>
.. [2] Etsuji Tomita, Akira Tanaka, Haruhisa Takahashi,
"The worst-case time complexity for generating all maximal
cliques and computational experiments",
*Theoretical Computer Science*, Volume 363, Issue 1,
Computing and Combinatorics,
10th Annual International Conference on
Computing and Combinatorics (COCOON 2004), 25 October 2006, Pages 28--42
<https://doi.org/10.1016/j.tcs.2006.06.015>
.. [3] F. Cazals, C. Karande,
"A note on the problem of reporting maximal cliques",
*Theoretical Computer Science*,
Volume 407, Issues 1--3, 6 November 2008, Pages 564--568,
<https://doi.org/10.1016/j.tcs.2008.05.010>
"""
if len(G.get_vertices()) == 0:
return iter([])
adj = {u: {v for v in G.get_out_neighbors(u)} for u in G.vertices()}
Q = []
def expand(subg, cand):
u = max(subg, key=lambda u: len(cand & adj[u]))
for q in cand - adj[u]:
cand.remove(q)
Q.append(q)
adj_q = adj[q]
subg_q = subg & adj_q
if not subg_q:
yield Q[:]
else:
cand_q = cand & adj_q
if cand_q:
for clique in expand(subg_q, cand_q):
yield clique
Q.pop()
return expand(set(G.get_vertices()), set(G.get_vertices()))
def cliques_containing_node(G, nodes=None, cliques=None):
"""Returns a list of cliques containing the given node.
Returns a single list or list of lists depending on input nodes.
Optional list of cliques can be input if already computed.
Taken from NetworkX.
https://github.com/networkx/networkx/blob/master/networkx/algorithms/clique.py
"""
if cliques is None:
cliques = list(find_cliques(G))
if nodes is None:
nodes = list(G.get_vertices()) # none, get entire graph
if not isinstance(nodes, list): # check for a list
v = nodes
# assume it is a single value
vcliques = [c for c in cliques if v in c]
else:
vcliques = {}
for v in nodes:
vcliques[v] = [c for c in cliques if v in c]
return vcliques
if __name__ == "__main__":
g = gt.collection.data["karate"]
cliques = list(find_cliques(g))
print(cliques)
print(cliques_containing_node(g, 1))

View file

@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"execution_count": null,
"metadata": {
"autoscroll": false,
"collapsed": false,
@ -21,7 +21,7 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": null,
"metadata": {
"autoscroll": false,
"collapsed": false,
@ -37,7 +37,7 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": null,
"metadata": {
"autoscroll": false,
"collapsed": false,
@ -46,29 +46,14 @@
"slide_type": "-"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"version: 2.26 (commit b89e6b4e, Thu Nov 9 14:55:43 2017 +0000)\n",
"gcc version: 7.2.0\n",
"compilation flags: -DNDEBUG -D_FORTIFY_SOURCE=2 -fopenmp -O3 -fvisibility=default -fvisibility-inlines-hidden -Wno-deprecated -Wall -Wextra -ftemplate-backtrace-limit=0 -march=x86-64 -mtune=generic -O2 -pipe -fstack-protector-strong -fno-plt -Wl,-O1,--sort-common,--as-needed,-z,relro,-z,now\n",
"install prefix: /usr\n",
"python dir: /usr/lib/python3.6/site-packages\n",
"graph filtering: True\n",
"openmp: True\n",
"uname: Linux asha 4.15.6-1-ARCH #1 SMP PREEMPT Sun Feb 25 12:53:23 UTC 2018 x86_64\n"
]
}
],
"outputs": [],
"source": [
"gt.show_config()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": null,
"metadata": {
"autoscroll": false,
"collapsed": false,
@ -85,7 +70,7 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": null,
"metadata": {
"autoscroll": false,
"collapsed": false,
@ -101,7 +86,7 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": null,
"metadata": {
"autoscroll": false,
"collapsed": false,
@ -110,16 +95,7 @@
"slide_type": "-"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<Graph object, directed, with 264347 vertices and 733846 edges at 0x7fde8465c2b0>\n",
"distance (edge) (type: int32_t)\n"
]
}
],
"outputs": [],
"source": [
"print(G)\n",
"G.list_properties()"
@ -127,7 +103,7 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": null,
"metadata": {
"autoscroll": false,
"collapsed": false,
@ -136,18 +112,7 @@
"slide_type": "-"
}
},
"outputs": [
{
"data": {
"text/plain": [
"PropertyArray([ 803, 842, 2428, ..., 1158, 323, 368], dtype=int32)"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"outputs": [],
"source": [
"dist = G.ep.get(\"distance\")\n",
"dist.get_array()"
@ -155,7 +120,7 @@
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": null,
"metadata": {
"autoscroll": false,
"collapsed": false,
@ -164,16 +129,7 @@
"slide_type": "-"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0.6327185812827214\n",
"464318\n"
]
}
],
"outputs": [],
"source": [
"filt = G.new_edge_property(\"bool\")\n",
"filt.a = dist.a > 800\n",
@ -184,7 +140,7 @@
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": null,
"metadata": {
"autoscroll": false,
"collapsed": false,
@ -193,18 +149,7 @@
"slide_type": "-"
}
},
"outputs": [
{
"data": {
"text/plain": [
"PropertyArray([ 1, 2, 3, ..., 34918, 36421, 36946], dtype=int32)"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"outputs": [],
"source": [
"ordered_dist = dist.get_array()\n",
"ordered_dist = np.unique(np.sort(ordered_dist))\n",
@ -223,6 +168,45 @@
}
},
"outputs": [],
"source": [
"import clique as cl"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"condmat = gt.collection.data[\"cond-mat-2005\"]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"clique_sizes = []\n",
"for c in cl.find_cliques(condmat):\n",
" clique_sizes.append(len(c))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"fig, ax = plt.subplots()\n",
"ax.hist(clique_sizes, bins=100);"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],