Initial commit

This commit is contained in:
Dimitri Lozeve 2018-03-02 17:50:54 +00:00
commit e30f30b0f9
8 changed files with 1115 additions and 0 deletions

86
proposal/preamble.tex Normal file
View file

@ -0,0 +1,86 @@
\usepackage{fontspec}
\setmainfont[Numbers=OldStyle]{Linux Libertine O}
\setsansfont[Numbers=OldStyle]{Linux Biolinum O}
\setmonofont[Scale=0.83]{Inconsolata}
\usepackage{polyglossia}
\setdefaultlanguage{english}
\usepackage{lipsum}
\usepackage{graphicx}
\usepackage[dvipsnames]{xcolor}
\usepackage{wrapfig}
\usepackage{subcaption}
\usepackage{lettrine}
\usepackage{amssymb, amsmath}
\usepackage{pdfpages}
\usepackage{microtype}
%% Propriétés du document PDF
\usepackage[unicode,colorlinks=true]{hyperref}
\hypersetup{
pdfauthor={Dimitri Lozeve},
pdftitle={Topological Data Analysis of time-dependent networks},
pdfsubject={MSc project proposal},
pdfkeywords={tda,network,project,msc},
pdfpagemode=UseOutlines,
% pdfpagelayout=TwoColumnRight,
linkcolor=MidnightBlue,
filecolor=MidnightBlue,
urlcolor=MidnightBlue,
citecolor=Green
}
%% Pour la classe memoir /!\
%% Marges
\setlrmarginsandblock{2.5cm}{3cm}{*}
%\setulmarginsandblock{4cm}{4cm}{*}
\checkandfixthelayout%
%% Numérotation des divisions logiques
\setsecnumdepth{subsection}
\maxsecnumdepth{subsection}
%% Profondeur de la ToC
\settocdepth{subsection}
\maxtocdepth{subsection}
%% Style des titres des divisions logiques
\setsecheadstyle{\Large\scshape}
\setsubsecheadstyle{\large\scshape}
%% Abstract
\abstractintoc%
\renewcommand{\abstractnamefont}{\normalfont\large\scshape}
\renewcommand{\abstracttextfont}{\normalfont\normalsize}
%% épigraphes
\setlength{\epigraphwidth}{0.5\textwidth}
\epigraphtextposition{flushleftright}
%% Couleurs
%\definecolor{purpletouch}{RGB}{103,30,117}
\definecolor{bleux}{RGB}{0,62,92}
\author{Dimitri Lozeve}
\date{February 15, 2018}
\title{MSc project proposal\\
\Large Topological Data Analysis of time-dependent networks}
%%% Local Variables:
%%% mode: latex
%%% TeX-master: "proposal"
%%% End:

195
proposal/proposal.bib Normal file
View file

@ -0,0 +1,195 @@
@book{oudot_persistence_2015,
location = {Providence, Rhode Island},
title = {Persistence theory: from quiver representations to data analysis},
isbn = {978-1-4704-2545-6},
series = {Mathematical surveys and monographs},
shorttitle = {Persistence theory},
pagetotal = {218},
number = {volume 209},
publisher = {American Mathematical Society},
author = {Oudot, Steve Y.},
date = {2015},
keywords = {Algebraic topology, Algebraic topology -- Applied homological algebra and category theory -- Simplicial sets and complexes, Associative rings and algebras -- Representation theory of rings and algebras -- Representations of quivers and partially ordered sets, Computer science -- Computing methodologies and applications -- Computer graphics; computational geometry, Homology theory, Statistics -- Data analysis},
file = {Steve_Oudot_Persistence_Theory.pdf:/home/dimitri/Zotero/storage/ALZW577G/Steve_Oudot_Persistence_Theory.pdf:application/pdf}
}
@article{carlsson_topology_2009,
title = {Topology and data},
volume = {46},
issn = {0273-0979},
url = {http://www.ams.org/journal-getitem?pii=S0273-0979-09-01249-X},
doi = {10.1090/S0273-0979-09-01249-X},
pages = {255--308},
number = {2},
journaltitle = {Bulletin of the American Mathematical Society},
author = {Carlsson, Gunnar},
urldate = {2017-11-03},
date = {2009-01-29},
langid = {english},
file = {carlsson2009.pdf:/home/dimitri/Zotero/storage/WYT52FA5/carlsson2009.pdf:application/pdf}
}
@article{chazal_introduction_2017,
title = {An introduction to Topological Data Analysis: fundamental and practical aspects for data scientists},
shorttitle = {An introduction to Topological Data Analysis},
journaltitle = {{arXiv} preprint {arXiv}:1710.04019},
author = {Chazal, Frédéric and Michel, Bertrand},
date = {2017},
file = {chazal2017.pdf:/home/dimitri/Zotero/storage/CH8YWVM3/chazal2017.pdf:application/pdf}
}
@book{edelsbrunner_computational_2010,
location = {Providence, R.I},
title = {Computational topology: an introduction},
isbn = {978-0-8218-4925-5},
shorttitle = {Computational topology},
pagetotal = {241},
publisher = {American Mathematical Society},
author = {Edelsbrunner, Herbert and Harer, J.},
date = {2010},
note = {{OCLC}: ocn427757156},
keywords = {Algorithms, Computational complexity, Data processing, Geometry, Topology},
file = {Herbert Edelsbrunner, John L. Harer-Computational Topology_ An Introduction-American Mathematical Society (2009).pdf:/home/dimitri/Zotero/storage/FWGR5NJ3/Herbert Edelsbrunner, John L. Harer-Computational Topology_ An Introduction-American Mathematical Society (2009).pdf:application/pdf}
}
@article{stolz_persistent_2017,
title = {Persistent homology of time-dependent functional networks constructed from coupled time series},
volume = {27},
issn = {1054-1500},
url = {http://aip.scitation.org/doi/full/10.1063/1.4978997},
doi = {10.1063/1.4978997},
abstract = {We use topological data analysis to study “functional networks” that we construct from time-series data from both experimental and synthetic sources. We use persistent homology with a weight rank clique filtration to gain insights into these functional networks, and we use persistence landscapes to interpret our results. Our first example uses time-series output from networks of coupled Kuramoto oscillators. Our second example consists of biological data in the form of functional magnetic resonance imaging data that were acquired from human subjects during a simple motor-learning task in which subjects were monitored for three days during a five-day period. With these examples, we demonstrate that (1) using persistent homology to study functional networks provides fascinating insights into their properties and (2) the position of the features in a filtration can sometimes play a more vital role than persistence in the interpretation of topological features, even though conventionally the latter is used to distinguish between signal and noise. We find that persistent homology can detect differences in synchronization patterns in our data sets over time, giving insight both on changes in community structure in the networks and on increased synchronization between brain regions that form loops in a functional network during motor learning. For the motor-learning data, persistence landscapes also reveal that on average the majority of changes in the network loops take place on the second of the three days of the learning process.},
pages = {047410},
number = {4},
journaltitle = {Chaos: An Interdisciplinary Journal of Nonlinear Science},
shortjournal = {Chaos},
author = {Stolz, Bernadette J. and Harrington, Heather A. and Porter, Mason A.},
urldate = {2018-01-18},
date = {2017-04-01},
file = {Full Text PDF:/home/dimitri/Zotero/storage/A2BD6EHP/Stolz et al. - 2017 - Persistent homology of time-dependent functional n.pdf:application/pdf;sichaostimeseries-april2017-corrected-v4-4.pdf:/home/dimitri/Zotero/storage/2W4IQ5TQ/sichaostimeseries-april2017-corrected-v4-4.pdf:application/pdf}
}
@article{schaub_graph_2016,
title = {Graph partitions and cluster synchronization in networks of oscillators},
volume = {26},
issn = {1054-1500},
url = {http://aip.scitation.org/doi/full/10.1063/1.4961065},
doi = {10.1063/1.4961065},
abstract = {Synchronization over networks depends strongly on the structure of the coupling between the oscillators. When the coupling presents certain regularities, the dynamics can be coarse-grained into clusters by means of External Equitable Partitions of the network graph and their associated quotient graphs. We exploit this graph-theoretical concept to study the phenomenon of cluster synchronization, in which different groups of nodes converge to distinct behaviors. We derive conditions and properties of networks in which such clustered behavior emerges and show that the ensuing dynamics is the result of the localization of the eigenvectors of the associated graph Laplacians linked to the existence of invariant subspaces. The framework is applied to both linear and non-linear models, first for the standard case of networks with positive edges, before being generalized to the case of signed networks with both positive and negative interactions. We illustrate our results with examples of both signed and unsigned graphs for consensus dynamics and for partial synchronization of oscillator networks under the master stability function as well as Kuramoto oscillators.},
pages = {094821},
number = {9},
journaltitle = {Chaos: An Interdisciplinary Journal of Nonlinear Science},
shortjournal = {Chaos},
author = {Schaub, Michael T. and O'Clery, Neave and Billeh, Yazan N. and Delvenne, Jean-Charles and Lambiotte, Renaud and Barahona, Mauricio},
urldate = {2018-02-13},
date = {2016-08-19},
file = {Full Text PDF:/home/dimitri/Zotero/storage/QDQY8L8M/Schaub et al. - 2016 - Graph partitions and cluster synchronization in ne.pdf:application/pdf;Snapshot:/home/dimitri/Zotero/storage/JP2SXD5G/1.html:text/html}
}
@article{tabourier_predicting_2016,
title = {Predicting links in ego-networks using temporal information},
volume = {5},
rights = {2016 Tabourier et al.},
issn = {2193-1127},
url = {https://epjdatascience.springeropen.com/articles/10.1140/epjds/s13688-015-0062-0},
doi = {10.1140/epjds/s13688-015-0062-0},
abstract = {Link prediction appears as a central problem of network science, as it calls for unfolding the mechanisms that govern the micro-dynamics of the network. In this work, we are interested in ego-networks, that is the mere information of interactions of a node to its neighbors, in the context of social relationships. As the structural information is very poor, we rely on another source of information to predict links among egos neighbors: the timing of interactions. We define several features to capture different kinds of temporal information and apply machine learning methods to combine these various features and improve the quality of the prediction. We demonstrate the efficiency of this temporal approach on a cellphone interaction dataset, pointing out features which prove themselves to perform well in this context, in particular the temporal profile of interactions and elapsed time between contacts.},
pages = {1},
number = {1},
journaltitle = {{EPJ} Data Science},
author = {Tabourier, Lionel and Libert, Anne-Sophie and Lambiotte, Renaud},
urldate = {2018-02-13},
date = {2016-12},
file = {Full Text PDF:/home/dimitri/Zotero/storage/ETM66HPY/Tabourier et al. - 2016 - Predicting links in ego-networks using temporal in.pdf:application/pdf;Snapshot:/home/dimitri/Zotero/storage/IUNKJ9YF/s13688-015-0062-0.html:text/html}
}
@article{noulas_mining_2015,
title = {Mining open datasets for transparency in taxi transport in metropolitan environments},
volume = {4},
rights = {2015 Noulas et al.},
issn = {2193-1127},
url = {https://epjdatascience.springeropen.com/articles/10.1140/epjds/s13688-015-0060-2},
doi = {10.1140/epjds/s13688-015-0060-2},
abstract = {Uber has recently been introducing novel practices in urban taxi transport. Journey prices can change dynamically in almost real time and also vary geographically from one area to another in a city, a strategy known as surge pricing. In this paper, we explore the power of the new generation of open datasets towards understanding the impact of the new disruption technologies that emerge in the area of public transport. With our primary goal being a more transparent economic landscape for urban commuters, we provide a direct price comparison between Uber and the Yellow Cab company in New York. We discover that Uber, despite its lower standard pricing rates, effectively charges higher fares on average, especially during short in length, but frequent in occurrence, taxi journeys. Building on this insight, we develop a smartphone application, {OpenStreetCab}, that offers a personalized consultation to mobile users on which taxi provider is cheaper for their journey. Almost five months after its launch, the app has attracted more than three thousand users in a single city. Their journey queries have provided additional insights on the potential savings similar technologies can have for urban commuters, with a highlight being that on average, a user in New York saves 6 U.S. Dollars per taxi journey if they pick the cheapest taxi provider. We run extensive experiments to show how Ubers surge pricing is the driving factor of higher journey prices and therefore higher potential savings for our applications users. Finally, motivated by the observation that Ubers surge pricing is occurring more frequently that intuitively expected, we formulate a prediction task where the aim becomes to predict a geographic areas tendency to surge. Using exogenous to Uber data, in particular Yellow Cab and Foursquare data, we show how it is possible to estimate customer demand within an area, and by extension surge pricing, with high accuracy.},
pages = {23},
number = {1},
journaltitle = {{EPJ} Data Science},
author = {Noulas, Anastasios and Salnikov, Vsevolod and Lambiotte, Renaud and Mascolo, Cecilia},
urldate = {2018-02-13},
date = {2015-12},
file = {Full Text PDF:/home/dimitri/Zotero/storage/N6P7THVK/Noulas et al. - 2015 - Mining open datasets for transparency in taxi tran.pdf:application/pdf;Snapshot:/home/dimitri/Zotero/storage/H3R7HWMH/s13688-015-0060-2.html:text/html}
}
@article{kivela_multilayer_2014,
title = {Multilayer Networks},
volume = {2},
issn = {2051-1310, 2051-1329},
url = {http://arxiv.org/abs/1309.7233},
doi = {10.1093/comnet/cnu016},
abstract = {In most natural and engineered systems, a set of entities interact with each other in complicated patterns that can encompass multiple types of relationships, change in time, and include other types of complications. Such systems include multiple subsystems and layers of connectivity, and it is important to take such "multilayer" features into account to try to improve our understanding of complex systems. Consequently, it is necessary to generalize "traditional" network theory by developing (and validating) a framework and associated tools to study multilayer systems in a comprehensive fashion. The origins of such efforts date back several decades and arose in multiple disciplines, and now the study of multilayer networks has become one of the most important directions in network science. In this paper, we discuss the history of multilayer networks (and related concepts) and review the exploding body of work on such networks. To unify the disparate terminology in the large body of recent work, we discuss a general framework for multilayer networks, construct a dictionary of terminology to relate the numerous existing concepts to each other, and provide a thorough discussion that compares, contrasts, and translates between related notions such as multilayer networks, multiplex networks, interdependent networks, networks of networks, and many others. We also survey and discuss existing data sets that can be represented as multilayer networks. We review attempts to generalize single-layer-network diagnostics to multilayer networks. We also discuss the rapidly expanding research on multilayer-network models and notions like community structure, connected components, tensor decompositions, and various types of dynamical processes on multilayer networks. We conclude with a summary and an outlook.},
pages = {203--271},
number = {3},
journaltitle = {Journal of Complex Networks},
author = {Kivelä, Mikko and Arenas, Alexandre and Barthelemy, Marc and Gleeson, James P. and Moreno, Yamir and Porter, Mason A.},
urldate = {2018-02-13},
date = {2014-09-01},
eprinttype = {arxiv},
eprint = {1309.7233},
keywords = {Physics - Physics and Society, Computer Science - Social and Information Networks},
file = {arXiv\:1309.7233 PDF:/home/dimitri/Zotero/storage/F98JFB2E/Kivelä et al. - 2014 - Multilayer Networks.pdf:application/pdf;arXiv.org Snapshot:/home/dimitri/Zotero/storage/7WBJRIBQ/1309.html:text/html}
}
@article{porter_dynamical_2014,
title = {Dynamical Systems on Networks: A Tutorial},
url = {http://arxiv.org/abs/1403.7663},
shorttitle = {Dynamical Systems on Networks},
abstract = {We give a tutorial for the study of dynamical systems on networks. We focus especially on "simple" situations that are tractable analytically, because they can be very insightful and provide useful springboards for the study of more complicated scenarios. We briefly motivate why examining dynamical systems on networks is interesting and important, and we then give several fascinating examples and discuss some theoretical results. We also briefly discuss dynamical systems on dynamical (i.e., time-dependent) networks, overview software implementations, and give an outlook on the field.},
journaltitle = {{arXiv}:1403.7663 [cond-mat, physics:nlin, physics:physics]},
author = {Porter, Mason A. and Gleeson, James P.},
urldate = {2018-02-13},
date = {2014-03-29},
eprinttype = {arxiv},
eprint = {1403.7663},
keywords = {Physics - Physics and Society, Computer Science - Social and Information Networks, Condensed Matter - Disordered Systems and Neural Networks, Condensed Matter - Statistical Mechanics, Nonlinear Sciences - Adaptation and Self-Organizing Systems},
file = {arXiv\:1403.7663 PDF:/home/dimitri/Zotero/storage/XBRAHARB/Porter and Gleeson - 2014 - Dynamical Systems on Networks A Tutorial.pdf:application/pdf;arXiv.org Snapshot:/home/dimitri/Zotero/storage/LF7GCTFE/1403.html:text/html}
}
@article{tierny_topology_2017,
title = {The Topology {ToolKit}},
url = {https://hal.archives-ouvertes.fr/hal-01499905/document},
abstract = {This system paper presents the Topology {ToolKit} ({TTK}), a software platform designed for topological data analysis in scientific visualization. While topological data analysis has gained in popularity over the last two decades, it has not yet been widely adopted as a standard data analysis tool for end users or developers. {TTK} aims at addressing this problem by providing a unified, generic, efficient, and robust implementation of key algorithms for the topological analysis of scalar data, including: critical points, integral lines, persistence diagrams, persistence curves, merge trees, contour trees, Morse-Smale complexes, fiber surfaces, continuous scatterplots, Jacobi sets, Reeb spaces, and more. {TTK} is easily accessible to end users due to a tight integration with {ParaView}. It is also easily accessible to developers through a variety of bindings (Python, {VTK}/C++) for fast prototyping or through direct, dependence-free, C++, to ease integration into pre-existing complex systems. While developing {TTK}, we faced several algorithmic and software engineering challenges, which we document in this paper. In particular, we present an algorithm for the construction of a discrete gradient that complies to the critical points extracted in the piecewise-linear setting. This algorithm guarantees a combinatorial consistency across the topological abstractions supported by {TTK}, and importantly, a unified implementation of topological data simplification for multi-scale exploration and analysis. We also present a cached triangulation data structure, that supports time efficient and generic traversals, which self-adjusts its memory usage on demand for input simplicial meshes and which implicitly emulates a triangulation for regular grids with no memory overhead. Finally, we describe an original software architecture, which guarantees memory efficient and direct accesses to {TTK} features, while still allowing for researchers powerful and easy bindings and extensions. {TTK} is open source ({BSD} license) and its code, online documentation and video tutorials are available on {TTK}'s website (https://topology-tool-kit.github.io/).},
journaltitle = {{IEEE} Transactions on Visualization and Computer Graphics},
author = {Tierny, Julien and Favelier, Guillaume and Levine, Joshua and Gueunet, Charles and Michaux, Michael},
urldate = {2018-02-15},
date = {2017-10-01},
langid = {english},
file = {Full Text PDF:/home/dimitri/Zotero/storage/TGURBQBF/Tierny et al. - 2017 - The Topology ToolKit.pdf:application/pdf;Snapshot:/home/dimitri/Zotero/storage/JAIQUA5K/hal-01499905v2.html:text/html}
}
@inproceedings{maria_gudhi_2014,
title = {The Gudhi Library: Simplicial Complexes and Persistent Homology},
isbn = {978-3-662-44198-5},
url = {https://link.springer.com/chapter/10.1007/978-3-662-44199-2_28},
doi = {10.1007/978-3-662-44199-2_28},
series = {Lecture Notes in Computer Science},
shorttitle = {The Gudhi Library},
abstract = {We present the main algorithmic and design choices that have been made to represent complexes and compute persistent homology in the Gudhi library. The Gudhi library (Geometric Understanding in Higher Dimensions) is a generic C++ library for computational topology. Its goal is to provide robust, efficient, flexible and easy to use implementations of state-of-the-art algorithms and data structures for computational topology. We present the different components of the software, their interaction and the user interface. We justify the algorithmic and design decisions made in Gudhi and provide benchmarks for the code. The software, which has been developped by the first author, will be available soon at project.inria.fr/gudhi/software/ .},
eventtitle = {International Congress on Mathematical Software},
pages = {167--174},
booktitle = {Mathematical Software {ICMS} 2014},
publisher = {Springer, Berlin, Heidelberg},
author = {Maria, Clément and Boissonnat, Jean-Daniel and Glisse, Marc and Yvinec, Mariette},
urldate = {2018-02-15},
date = {2014-08-05},
langid = {english},
file = {Snapshot:/home/dimitri/Zotero/storage/3YRXLXZL/978-3-662-44199-2_28.html:text/html}
}
@online{oudot_inf556_????,
title = {{INF}556 -- Topological Data Analysis},
url = {http://www.enseignement.polytechnique.fr/informatique/INF556/},
author = {Oudot, Steve Y.},
urldate = {2018-02-16},
file = {INF556 -- Topological Data Analysis:/home/dimitri/Zotero/storage/TNRU945Q/INF556.html:text/html}
}

BIN
proposal/proposal.pdf Normal file

Binary file not shown.

90
proposal/proposal.tex Normal file
View file

@ -0,0 +1,90 @@
\documentclass[article,a4paper,11pt,openany,extrafontsizes]{memoir}
\input{preamble}
\usepackage[backend=biber,style=ieee,url=false,arxiv=abs]{biblatex}
\addbibresource{proposal.bib}
\tightlists%
\begin{document}
\maketitle
% \subsection*{Title}
% Topological Data Analysis of Time-dependent Networks
\subsection*{Supervisors}
Dr Heather Harrington (Mathematical Institute) and Dr Gesine Reinert
(Department of Statistics)
\subsection*{Description}
Topological Data Analysis (TDA)~\cite{chazal_introduction_2017,
oudot_persistence_2015, carlsson_topology_2009,
edelsbrunner_computational_2010} is a family of techniques gaining
an increasing importance in the analysis and visualization of
high-dimensional data in machine learning applications.
In this project, we will apply TDA techniques and persistent homology
to time-dependent networks, in order to understand how the topological
structure evolves over time in complex multilayer
networks~\cite{kivela_multilayer_2014, porter_dynamical_2014}.
There are two ways of obtaining time-dependent networks. Network data
is available easily in many contexts: social networks and biological
processes are two examples of systems evolving over time and that can
be modelled as a graph. For instance, in social networks, links in ego
networks have already been studied in the context of
time-dependency~\cite{tabourier_predicting_2016}.
The other large category is time series. It is possible to use a
similarity measure to build a network from a set of time series taken
from the same physical process. Although it could be applied to any
set of time series, this has already been studied in the case of
coupled oscillators (such as Kuramoto
oscillators)~\cite{stolz_persistent_2017, schaub_graph_2016}. It is
thus easy to find relevant datasets or to generate interesting data
from physical simulations.
It is then possible to apply existing TDA and persistent homology
techniques to the networks, taking into account the temporal
dimension. Certain methods have already been implemented in
topological data analysis libraries~\cite{tierny_topology_2017,
maria_gudhi_2014}, although they would have to be adapted to network
data, and applied repeatedly to each time step. There is also a wide
range of methods to explore, from the choice of the similarity
measure, to the choice of filtration (in order to build a simplicial
complex on the network), to the representation of topological
structure. Each of these choices has a great influence on the final
interpretation of the data, and may need to be adapted to each system.
\subsection*{Prerequisite courses/knowledge}
\begin{itemize}
\item SM7 Probability and Statistics for Network Analysis
\item Topological Data Analysis and Persistent
Homology\footnote{\url{http://www.enseignement.polytechnique.fr/informatique/INF556/}}
\end{itemize}
\subsection*{Computing required?}
Yes
\subsection*{Data available?}
Yes
%%\nocite{*}
% \bibliographystyle{ieeetr}
% \bibliography{proposal}
\printbibliography%
\end{document}
%%% Local Variables:
%%% mode: latex
%%% TeX-master: t
%%% End: