Source code for flotilla.compute.network

"""
Compute networks (the kind with nodes and edges) on data. Visualize with
:py:mod:flotilla.visualize.network
"""

import networkx as nx
import numpy as np
import pandas as pd

from ..util import memoize
from ..visualize.color import dark2


[docs]class Networker(object): """Networks (the kind with nodes and edges), aka a graph Calculate the edges based on similarity between rows of PCA-reduced data """ weight_funs = ['no_weight', 'sq', 'arctan', 'arctan_sq'] def __init__(self): """Construct a Networker object with default node colors (dark teal) and sizes (all nodes at 300) """ self._default_node_color_mapper = lambda x: dark2[0] self._default_node_size_mapper = lambda x: 300
[docs] def get_weight_fun(self, fun_name='no_weight'): """Given a string, return the function Used to obtain functions that perform common transforms on distance Parameters ---------- fun_name : 'no_weight' | 'sq' | 'arctan' | 'arctan_sq', optional Name of the function to obtain (default 'no_weight') Returns ------- func : function A function which transforms a number in the indicated way Raises ------ ValueError If `fun_name` is not one of the ones indicated above """ def _noweight(x): return x def _arctan_sq(x): return np.arctan(x) ** 2 if fun_name == 'no_weight': wt = _noweight elif fun_name == 'sq': wt = np.square elif fun_name == 'arctan': wt = np.arctan elif fun_name == 'arctan_sq': wt = _arctan_sq else: raise ValueError return wt
@memoize
[docs] def adjacency(self, data, use_pc_1=True, use_pc_2=True, use_pc_3=True, use_pc_4=True, n_pcs=5): """Calculate the adjacency graph, i.e. connectedness between nodes Parameters ---------- data : pandas.DataFrame A (n_nodes, n_pcs) sized dataframe of reduced data use_pc1 : bool, optional If True, use the first principal component of reduced data (default True) use_pc2 : bool, optional If True, use the second principal component of reduced data (default True) use_pc3 : bool, optional If True, use the third principal component of reduced data (default True) use_pc4 : bool, optional If True, use the fourth principal component of reduced data (default True) n_pcs : int, optional Total number of principal components to use (default 5) Returns ------- adjacency : pandas.DataFrame A lower triangular matrix of the edge weights between the rows of the data """ total_pcs = data.shape[1] use_cols = np.ones(total_pcs, dtype='bool') use_cols[n_pcs:] = False use_cols = use_cols * np.array( [use_pc_1, use_pc_2, use_pc_3, use_pc_4] + [True, ] * ( total_pcs - 4)) subset = data.loc[:, use_cols] cov = np.cov(subset) nrow, ncol = subset.shape return pd.DataFrame(np.tril(cov * - (np.identity(nrow) - 1)), index=subset.index, columns=data.index)
@memoize
[docs] def graph(self, adjacency, cov_cut=0, node_color_mapper=None, node_size_mapper=None, degree_cut=2, weight_function='no_weight', name=None): """Create a graph based on the adjacency matrix and other inputs Parameters ---------- adjacency : pandas.DataFrame A (n_nodes, n_nodes) square dataframe of edge weights between all nodes in the graph cov_cut : float, optional Minimum covariance between two nodes for their edge to be plotted. (default 0) node_color_mapper : function, optional Function to recolor the nodes for plotting, based on the node name. If None, defaults to a dark teal. (default None) node_size_mapper : function, optional Function to resize the nodes for plotting, based on the node name. If None, defaults to the same size for all nodes. (default None) degree_cut : int Minimum number of edges a node must have for it to be drawn on the graph weight_function : 'no_weight' | 'sq' | 'arctan' | 'arctan_sq', optional Weight function of the edges. The lower the weight, the farther away two nodes are drawn from each other. name : str, optional (default=None) For memoization purposes, not used in the function. Returns ------- graph : networkx.Graph The graph created with all these parameters positions : dict A {node_name : [x, y]} mapping of all nodes and their x, y positions """ if node_color_mapper is None: node_color_mapper = self._default_node_color_mapper if node_size_mapper is None: node_size_mapper = self._default_node_size_mapper weight = self.get_weight_fun(weight_function) graph = nx.Graph() for node_label in adjacency.index: node_color = node_color_mapper(node_label) node_size = node_size_mapper(node_label) graph.add_node(node_label, node_size=node_size, node_color=node_color) for cell1, others in adjacency.iterrows(): for cell2, value in others.iteritems(): if value > cov_cut: # cast to floats because write_gml doesn't like numpy # dtypes graph.add_edge(cell1, cell2, weight=float(weight(value)), inv_weight=float(1 / weight(value)), alpha=0.05) graph.remove_nodes_from( [k for k, v in graph.degree().iteritems() if v <= degree_cut]) # TODO: can we output this as a (nodes, (x, y)) DataFrame instead? positions = nx.spring_layout(graph) return graph, positions
Olga B. Botvinnik is funded by the NDSEG fellowship and is a NumFOCUS John Hunter Technology Fellow.
Michael T. Lovci was partially funded by a fellowship from Genentech.
Partially funded by NIH grants NS075449 and HG004659 and CIRM grants RB4-06045 and TR3-05676 to Gene Yeo.