tda-networks/sociopatterns.ipynb
2018-09-10 10:30:45 +01:00

650 lines
35 KiB
Text

{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# SocioPatterns"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"autoscroll": false,
"collapsed": false,
"ein.tags": "worksheet-0",
"slideshow": {
"slide_type": "-"
}
},
"outputs": [],
"source": [
"import numpy as np\n",
"import pandas as pd\n",
"import matplotlib.pyplot as plt\n",
"%matplotlib inline"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"autoscroll": false,
"collapsed": false,
"ein.tags": "worksheet-0",
"slideshow": {
"slide_type": "-"
}
},
"outputs": [],
"source": [
"import igraph as ig\n",
"import dionysus as d"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"plt.style.use(\"fivethirtyeight\")\n",
"plt.rcParams[\"figure.figsize\"] = 10, 6"
]
},
{
"cell_type": "code",
"execution_count": 53,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"The autoreload extension is already loaded. To reload it, use:\n",
" %reload_ext autoreload\n"
]
}
],
"source": [
"%load_ext autoreload\n",
"%autoreload 2"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'2.0.7.dev0'"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"d.__version__"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Data import"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"g = ig.read(\"data/sociopatterns/infectious/infectious.graphml\")"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'IGRAPH UN-- 10972 415912 -- \\n+ attr: id (v), name (v), id (e), time (e)'"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"g.summary()"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"80.0"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"(max(g.es[\"time\"]) - min(g.es[\"time\"])) // (3600 * 24)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Temporal partitioning"
]
},
{
"cell_type": "code",
"execution_count": 43,
"metadata": {},
"outputs": [],
"source": [
"from zigzag import *"
]
},
{
"cell_type": "code",
"execution_count": 44,
"metadata": {},
"outputs": [],
"source": [
"wins = sliding_windows(g, 0.05)"
]
},
{
"cell_type": "code",
"execution_count": 46,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"20"
]
},
"execution_count": 46,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(wins)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Zigzag persistence"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 720x432 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"for w in wins[0:1]:\n",
" (f, t) = presence_times(w)\n",
" zz, dgms, cells = d.zigzag_homology_persistence(f, t)\n",
" d.plot.plot_diagram(dgms[0])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"d.plot.plot_diagram(dgms[1])"
]
},
{
"cell_type": "code",
"execution_count": 47,
"metadata": {},
"outputs": [],
"source": [
"import multiprocessing"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"pool = multiprocessing.Pool(processes=multiprocessing.cpu_count())\n",
"dgms = pool.map(zigzag_network, wins)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"pool.terminate()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Sliced Wasserstein kernel"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [],
"source": [
"dgms0 = [dgm[0] for dgm in dgms if dgm != []]"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [],
"source": [
"dgms1 = [dgm[1] for dgm in dgms if len(dgm) > 1]"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [],
"source": [
"from sliced_wasserstein import diagram_array, SW_approx"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([[ 0. , 168498.26575157, 142213.96446884, ...,\n",
" 181004.87138341, 980779.00902599, 604002.56924413],\n",
" [ 168498.26575157, 0. , 108974.83210031, ...,\n",
" 292878.72367602, 864985.91166401, 466984.82235283],\n",
" [ 142213.96446884, 108974.83210031, 0. , ...,\n",
" 223518.93608983, 951003.34397253, 553228.56514372],\n",
" ...,\n",
" [ 181004.87138341, 292878.72367602, 223518.93608983, ...,\n",
" 0. , 1153158.29742231, 755045.92126984],\n",
" [ 980779.00902599, 864985.91166401, 951003.34397253, ...,\n",
" 1153158.29742231, 0. , 607164.48834291],\n",
" [ 604002.56924413, 466984.82235283, 553228.56514372, ...,\n",
" 755045.92126984, 607164.48834291, 0. ]])"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"gram0 = np.array([[SW_approx(dgms0[i], dgms0[j], 10) for i in range(len(dgms0))] for j in range(len(dgms0))])\n",
"gram0"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([[ 0. , 1782.59876486, 1976.144841 , ..., 1674.27089231,\n",
" 4008.08473571, 3074.53612959],\n",
" [1782.59876486, 0. , 814.44479983, ..., 1389.37805531,\n",
" 3609.13022428, 1694.74694713],\n",
" [1976.144841 , 814.44479983, 0. , ..., 1665.50397679,\n",
" 4034.59792213, 2097.51910373],\n",
" ...,\n",
" [1674.27089231, 1389.37805531, 1665.50397679, ..., 0. ,\n",
" 2953.12379337, 1952.36875879],\n",
" [4008.08473571, 3609.13022428, 4034.59792213, ..., 2953.12379337,\n",
" 0. , 3326.13065883],\n",
" [3074.53612959, 1694.74694713, 2097.51910373, ..., 1952.36875879,\n",
" 3326.13065883, 0. ]])"
]
},
"execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"gram1 = np.array([[SW_approx(dgms1[i], dgms1[j], 10) for i in range(len(dgms1))] for j in range(len(dgms1))])\n",
"gram1"
]
},
{
"cell_type": "code",
"execution_count": 36,
"metadata": {},
"outputs": [],
"source": [
"def dgm2array(dgm):\n",
" return np.array([[p.birth, p.death] for p in dgm])"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {},
"outputs": [],
"source": [
"from sklearn_tda import SlicedWasserstein"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Bottleneck distance"
]
},
{
"cell_type": "code",
"execution_count": 50,
"metadata": {},
"outputs": [],
"source": [
"from dask.distributed import Client"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"client = Client()\n",
"distmat = []\n",
"for dgm in dgms0:\n",
" distmat.append(client.map(lambda x: d.bottleneck_distance(x, dgm), dgms0))"
]
},
{
"cell_type": "code",
"execution_count": 104,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([[ 0. , 37.67822266, 71.04492188, ..., inf,\n",
" inf, 19.10028076],\n",
" [37.69311523, 0. , 70.44287109, ..., inf,\n",
" inf, 37.14916992],\n",
" [70.62719727, 70.30273438, 0. , ..., inf,\n",
" inf, 69.04138184],\n",
" ...,\n",
" [ inf, inf, inf, ..., 0. ,\n",
" inf, inf],\n",
" [ inf, inf, inf, ..., inf,\n",
" 0. , inf],\n",
" [19.10028076, 37.14916992, 69.04138184, ..., inf,\n",
" inf, 0. ]])"
]
},
"execution_count": 104,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"distmat = np.array(client.gather(distmat))\n",
"distmat"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Clustering"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/lozeve/.local/share/virtualenvs/tda-networks--KypeAmE/lib/python3.5/importlib/_bootstrap.py:222: RuntimeWarning: numpy.dtype size changed, may indicate binary incompatibility. Expected 96, got 88\n",
" return f(*args, **kwds)\n",
"/home/lozeve/.local/share/virtualenvs/tda-networks--KypeAmE/lib/python3.5/importlib/_bootstrap.py:222: RuntimeWarning: numpy.dtype size changed, may indicate binary incompatibility. Expected 96, got 88\n",
" return f(*args, **kwds)\n",
"/home/lozeve/.local/share/virtualenvs/tda-networks--KypeAmE/lib/python3.5/importlib/_bootstrap.py:222: RuntimeWarning: numpy.dtype size changed, may indicate binary incompatibility. Expected 96, got 88\n",
" return f(*args, **kwds)\n",
"/home/lozeve/.local/share/virtualenvs/tda-networks--KypeAmE/lib/python3.5/importlib/_bootstrap.py:222: RuntimeWarning: numpy.dtype size changed, may indicate binary incompatibility. Expected 96, got 88\n",
" return f(*args, **kwds)\n"
]
}
],
"source": [
"from sklearn.svm import OneClassSVM"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [],
"source": [
"clf = OneClassSVM(kernel='precomputed')"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0,\n",
" 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0,\n",
" 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,\n",
" 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1])"
]
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"(clf.predict(gram)+1)//2"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.cluster import AgglomerativeClustering"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/lozeve/.local/share/virtualenvs/tda-networks--KypeAmE/lib/python3.5/site-packages/ipykernel_launcher.py:1: RuntimeWarning: divide by zero encountered in true_divide\n",
" \"\"\"Entry point for launching an IPython kernel.\n"
]
}
],
"source": [
"gram1 = 1/gram1\n",
"gram1[gram1 == np.inf] = 0"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [],
"source": [
"clf = AgglomerativeClustering(n_clusters=2, affinity=\"precomputed\", linkage=\"complete\")"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"AgglomerativeClustering(affinity='precomputed', compute_full_tree='auto',\n",
" connectivity=None, linkage='complete', memory=None,\n",
" n_clusters=2, pooling_func=<function mean at 0x7f3dd41c29d8>)"
]
},
"execution_count": 27,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"clf.fit(gram1)"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0,\n",
" 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0,\n",
" 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0])"
]
},
"execution_count": 28,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"clf.fit(gram1)\n",
"clf.labels_"
]
},
{
"cell_type": "code",
"execution_count": 105,
"metadata": {},
"outputs": [],
"source": [
"distmat[distmat==np.inf] = 1e100"
]
},
{
"cell_type": "code",
"execution_count": 106,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1,\n",
" 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0,\n",
" 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0,\n",
" 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 1])"
]
},
"execution_count": 106,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"clf.fit(distmat)\n",
"clf.labels_"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.2"
}
},
"nbformat": 4,
"nbformat_minor": 2
}