Conrad Lee conradlee

## edgelist2pajek.py
#!/usr/bin/env python
import os
import sys
import subprocess
import optparse
import tempfile

# Special feature: can convert files so large that they
# don't fit in memory. Works for weighted/unweighted,
# directed/undirected edges.

## simple_edgelist_parser.py
def read_edgelist(in_filename):
    with open(in_filename) as f:
        return [edge for edge in edge_generator(f)]

def edge_generator(f):
    for line in f:
        n1, n2, weight = line.rstrip("\n").split()
        yield int(n1), int(n2), float(weight)

## mmap_edgelist_parser.py
import numpy
import subprocess

weighted_edge_dtype = [("n1", numpy.uint32),("n2", numpy.uint32),("weight", numpy.float64)]

def convert_edgelist_to_mmap(in_filename):
    # First determine number of edges because we will need to
    # pre-allocate memmap object and that action requires a size

    # Use unix's wc (WordCount) to count lines because it is

## clique_percolation.py
import networkx as nx
from itertools import combinations

def get_percolated_cliques(G, k):
    perc_graph = nx.Graph()
    cliques = list(frozenset(c) for c in nx.find_cliques(G) if len(c) >= k)
    perc_graph.add_nodes_from(cliques)

    # Add an edge in the clique graph for each pair of cliques that percolate
    for c1, c2 in combinations(cliques, 2):

## clique_percolation_indexed.py
import networkx as nx
from collections import defaultdict

def get_percolated_cliques(G, k):
    perc_graph = nx.Graph()
    cliques = [frozenset(c) for c in nx.find_cliques(G) if len(c) >= k]
    perc_graph.add_nodes_from(cliques)

    # First index which nodes are in which cliques
    membership_dict = defaultdict(list)

## clique_percolation_networkx.py
import networkx as nx
from collections import defaultdict
from itertools import combinations

def get_percolated_cliques(G, k, cliques=None):
    """
    Finds k-percolated cliques in G, e.g,

    Unless the cliques argument evaluates to True, this algorithm
    first enumerates all cliques in G. These are stored in memory,

## mean_shift.py
import numpy as np
from sklearn.neighbors import BallTree
from sklearn.utils import extmath

# For the full-blown implementation, see www.scikit-learn.org

def mean_shift(X, bandwidth, seeds, kernel_update_function, max_iterations=300):
    n_points, n_features = X.shape
    stop_thresh = 1e-3 * bandwidth  # when mean has converged
    cluster_centers = []

## kernel_update_functions.py
import numpy as np
from sklearn.metrics.pairwise import euclidean_distances

def gaussian_kernel_update(x, points, bandwidth):
    distances = euclidean_distances(points, x)
    weights = np.exp(-1 * (distances ** 2 / bandwidth ** 2))
    return np.sum(points * weights, axis=0) / np.sum(weights)

def flat_kernel_update(x, points, bandwidth):
    return np.mean(points, axis=0)

## bin_points.py
import numpy as np
from collections import defaultdict

def bin_points(X, bin_size, min_bin_freq):
    bin_sizes = defaultdict(int)
    for point in X:
        binned_point = np.cast[np.int32](point / bin_size)
        bin_sizes[tuple(binned_point)] += 1

    bin_seeds = np.array([point for point, freq in bin_sizes.iteritems() if freq >= min_bin_freq], dtype=np.float32)

## mysql-utf8.txt
mysql> show variables like '%character%';
+--------------------------+----------------------------+
| Variable_name            | Value                      |
+--------------------------+----------------------------+
| character_set_client     | utf8                       |
| character_set_connection | utf8                       |
| character_set_database   | utf8                       |
| character_set_filesystem | binary                     |
| character_set_results    | utf8                       |
| character_set_server     | utf8                       |
	#!/usr/bin/env python
	import os
	import sys
	import subprocess
	import optparse
	import tempfile

	# Special feature: can convert files so large that they
	# don't fit in memory. Works for weighted/unweighted,
	# directed/undirected edges.
	def read_edgelist(in_filename):
	with open(in_filename) as f:
	return [edge for edge in edge_generator(f)]

	def edge_generator(f):
	for line in f:
	n1, n2, weight = line.rstrip("\n").split()
	yield int(n1), int(n2), float(weight)
	import numpy
	import subprocess

	weighted_edge_dtype = [("n1", numpy.uint32),("n2", numpy.uint32),("weight", numpy.float64)]

	def convert_edgelist_to_mmap(in_filename):
	# First determine number of edges because we will need to
	# pre-allocate memmap object and that action requires a size

	# Use unix's wc (WordCount) to count lines because it is
	import networkx as nx
	from itertools import combinations

	def get_percolated_cliques(G, k):
	perc_graph = nx.Graph()
	cliques = list(frozenset(c) for c in nx.find_cliques(G) if len(c) >= k)
	perc_graph.add_nodes_from(cliques)

	# Add an edge in the clique graph for each pair of cliques that percolate
	for c1, c2 in combinations(cliques, 2):
	import networkx as nx
	from collections import defaultdict

	def get_percolated_cliques(G, k):
	perc_graph = nx.Graph()
	cliques = [frozenset(c) for c in nx.find_cliques(G) if len(c) >= k]
	perc_graph.add_nodes_from(cliques)

	# First index which nodes are in which cliques
	membership_dict = defaultdict(list)
	import numpy as np
	from sklearn.neighbors import BallTree
	from sklearn.utils import extmath

	# For the full-blown implementation, see www.scikit-learn.org

	def mean_shift(X, bandwidth, seeds, kernel_update_function, max_iterations=300):
	n_points, n_features = X.shape
	stop_thresh = 1e-3 * bandwidth # when mean has converged
	cluster_centers = []
	import numpy as np
	from sklearn.metrics.pairwise import euclidean_distances

	def gaussian_kernel_update(x, points, bandwidth):
	distances = euclidean_distances(points, x)
	weights = np.exp(-1 * (distances 2 / bandwidth 2))
	return np.sum(points * weights, axis=0) / np.sum(weights)

	def flat_kernel_update(x, points, bandwidth):
	return np.mean(points, axis=0)
	import numpy as np
	from collections import defaultdict

	def bin_points(X, bin_size, min_bin_freq):
	bin_sizes = defaultdict(int)
	for point in X:
	binned_point = np.cast[np.int32](point / bin_size)
	bin_sizes[tuple(binned_point)] += 1

	bin_seeds = np.array([point for point, freq in bin_sizes.iteritems() if freq >= min_bin_freq], dtype=np.float32)
	mysql> show variables like '%character%';
	+--------------------------+----------------------------+
	\| Variable_name \| Value \|
	+--------------------------+----------------------------+
	\| character_set_client \| utf8 \|
	\| character_set_connection \| utf8 \|
	\| character_set_database \| utf8 \|
	\| character_set_filesystem \| binary \|
	\| character_set_results \| utf8 \|
	\| character_set_server \| utf8 \|