This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import os | |
import sys | |
import subprocess | |
import optparse | |
import tempfile | |
# Special feature: can convert files so large that they | |
# don't fit in memory. Works for weighted/unweighted, | |
# directed/undirected edges. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def read_edgelist(in_filename): | |
with open(in_filename) as f: | |
return [edge for edge in edge_generator(f)] | |
def edge_generator(f): | |
for line in f: | |
n1, n2, weight = line.rstrip("\n").split() | |
yield int(n1), int(n2), float(weight) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy | |
import subprocess | |
weighted_edge_dtype = [("n1", numpy.uint32),("n2", numpy.uint32),("weight", numpy.float64)] | |
def convert_edgelist_to_mmap(in_filename): | |
# First determine number of edges because we will need to | |
# pre-allocate memmap object and that action requires a size | |
# Use unix's wc (WordCount) to count lines because it is |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import networkx as nx | |
from itertools import combinations | |
def get_percolated_cliques(G, k): | |
perc_graph = nx.Graph() | |
cliques = list(frozenset(c) for c in nx.find_cliques(G) if len(c) >= k) | |
perc_graph.add_nodes_from(cliques) | |
# Add an edge in the clique graph for each pair of cliques that percolate | |
for c1, c2 in combinations(cliques, 2): |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import networkx as nx | |
from collections import defaultdict | |
def get_percolated_cliques(G, k): | |
perc_graph = nx.Graph() | |
cliques = [frozenset(c) for c in nx.find_cliques(G) if len(c) >= k] | |
perc_graph.add_nodes_from(cliques) | |
# First index which nodes are in which cliques | |
membership_dict = defaultdict(list) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import networkx as nx | |
from collections import defaultdict | |
from itertools import combinations | |
def get_percolated_cliques(G, k, cliques=None): | |
""" | |
Finds k-percolated cliques in G, e.g, | |
Unless the cliques argument evaluates to True, this algorithm | |
first enumerates all cliques in G. These are stored in memory, |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
from sklearn.neighbors import BallTree | |
from sklearn.utils import extmath | |
# For the full-blown implementation, see www.scikit-learn.org | |
def mean_shift(X, bandwidth, seeds, kernel_update_function, max_iterations=300): | |
n_points, n_features = X.shape | |
stop_thresh = 1e-3 * bandwidth # when mean has converged | |
cluster_centers = [] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
from sklearn.metrics.pairwise import euclidean_distances | |
def gaussian_kernel_update(x, points, bandwidth): | |
distances = euclidean_distances(points, x) | |
weights = np.exp(-1 * (distances ** 2 / bandwidth ** 2)) | |
return np.sum(points * weights, axis=0) / np.sum(weights) | |
def flat_kernel_update(x, points, bandwidth): | |
return np.mean(points, axis=0) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
from collections import defaultdict | |
def bin_points(X, bin_size, min_bin_freq): | |
bin_sizes = defaultdict(int) | |
for point in X: | |
binned_point = np.cast[np.int32](point / bin_size) | |
bin_sizes[tuple(binned_point)] += 1 | |
bin_seeds = np.array([point for point, freq in bin_sizes.iteritems() if freq >= min_bin_freq], dtype=np.float32) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
mysql> show variables like '%character%'; | |
+--------------------------+----------------------------+ | |
| Variable_name | Value | | |
+--------------------------+----------------------------+ | |
| character_set_client | utf8 | | |
| character_set_connection | utf8 | | |
| character_set_database | utf8 | | |
| character_set_filesystem | binary | | |
| character_set_results | utf8 | | |
| character_set_server | utf8 | |
OlderNewer