Skip to content

Instantly share code, notes, and snippets.

@syhw
Last active October 19, 2024 08:20
Show Gist options
  • Save syhw/8a0f820261926e2f41cc to your computer and use it in GitHub Desktop.
Save syhw/8a0f820261926e2f41cc to your computer and use it in GitHub Desktop.
A simple deep neural network with or w/o dropout in one file.
"""
A deep neural network with or w/o dropout in one file.
License: Do What The Fuck You Want to Public License http://www.wtfpl.net/
"""
import numpy, theano, sys, math
from theano import tensor as T
from theano import shared
from theano.tensor.shared_randomstreams import RandomStreams
from collections import OrderedDict
BATCH_SIZE = 100
def relu_f(vec):
""" Wrapper to quickly change the rectified linear unit function """
return (vec + abs(vec)) / 2.
def dropout(rng, x, p=0.5):
""" Zero-out random values in x with probability p using rng """
if p > 0. and p < 1.:
seed = rng.randint(2 ** 30)
srng = theano.tensor.shared_randomstreams.RandomStreams(seed)
mask = srng.binomial(n=1, p=1.-p, size=x.shape,
dtype=theano.config.floatX)
return x * mask
return x
def fast_dropout(rng, x):
""" Multiply activations by N(1,1) """
seed = rng.randint(2 ** 30)
srng = RandomStreams(seed)
mask = srng.normal(size=x.shape, avg=1., dtype=theano.config.floatX)
return x * mask
def build_shared_zeros(shape, name):
""" Builds a theano shared variable filled with a zeros numpy array """
return shared(value=numpy.zeros(shape, dtype=theano.config.floatX),
name=name, borrow=True)
class Linear(object):
""" Basic linear transformation layer (W.X + b) """
def __init__(self, rng, input, n_in, n_out, W=None, b=None, fdrop=False):
if W is None:
W_values = numpy.asarray(rng.uniform(
low=-numpy.sqrt(6. / (n_in + n_out)),
high=numpy.sqrt(6. / (n_in + n_out)),
size=(n_in, n_out)), dtype=theano.config.floatX)
W_values *= 4 # This works for sigmoid activated networks!
W = theano.shared(value=W_values, name='W', borrow=True)
if b is None:
b = build_shared_zeros((n_out,), 'b')
self.input = input
self.W = W
self.b = b
self.params = [self.W, self.b]
self.output = T.dot(self.input, self.W) + self.b
if fdrop:
self.output = fast_dropout(rng, self.output)
def __repr__(self):
return "Linear"
class SigmoidLayer(Linear):
""" Sigmoid activation layer (sigmoid(W.X + b)) """
def __init__(self, rng, input, n_in, n_out, W=None, b=None, fdrop=False):
super(SigmoidLayer, self).__init__(rng, input, n_in, n_out, W, b)
self.pre_activation = self.output
if fdrop:
self.pre_activation = fast_dropout(rng, self.pre_activation)
self.output = T.nnet.sigmoid(self.pre_activation)
class ReLU(Linear):
""" Rectified Linear Unit activation layer (max(0, W.X + b)) """
def __init__(self, rng, input, n_in, n_out, W=None, b=None, fdrop=False):
if b is None:
b = build_shared_zeros((n_out,), 'b')
super(ReLU, self).__init__(rng, input, n_in, n_out, W, b)
self.pre_activation = self.output
if fdrop:
self.pre_activation = fast_dropout(rng, self.pre_activation)
self.output = relu_f(self.pre_activation)
class DatasetMiniBatchIterator(object):
""" Basic mini-batch iterator """
def __init__(self, x, y, batch_size=BATCH_SIZE, randomize=False):
self.x = x
self.y = y
self.batch_size = batch_size
self.randomize = randomize
from sklearn.utils import check_random_state
self.rng = check_random_state(42)
def __iter__(self):
n_samples = self.x.shape[0]
if self.randomize:
for _ in xrange(n_samples / BATCH_SIZE):
if BATCH_SIZE > 1:
i = int(self.rng.rand(1) * ((n_samples+BATCH_SIZE-1) / BATCH_SIZE))
else:
i = int(math.floor(self.rng.rand(1) * n_samples))
yield (i, self.x[i*self.batch_size:(i+1)*self.batch_size],
self.y[i*self.batch_size:(i+1)*self.batch_size])
else:
for i in xrange((n_samples + self.batch_size - 1)
/ self.batch_size):
yield (self.x[i*self.batch_size:(i+1)*self.batch_size],
self.y[i*self.batch_size:(i+1)*self.batch_size])
class LogisticRegression:
"""Multi-class Logistic Regression
"""
def __init__(self, rng, input, n_in, n_out, W=None, b=None):
if W != None:
self.W = W
else:
self.W = build_shared_zeros((n_in, n_out), 'W')
if b != None:
self.b = b
else:
self.b = build_shared_zeros((n_out,), 'b')
# P(Y|X) = softmax(W.X + b)
self.p_y_given_x = T.nnet.softmax(T.dot(input, self.W) + self.b)
self.y_pred = T.argmax(self.p_y_given_x, axis=1)
self.output = self.y_pred
self.params = [self.W, self.b]
def negative_log_likelihood(self, y):
return -T.mean(T.log(self.p_y_given_x)[T.arange(y.shape[0]), y])
def negative_log_likelihood_sum(self, y):
return -T.sum(T.log(self.p_y_given_x)[T.arange(y.shape[0]), y])
def training_cost(self, y):
""" Wrapper for standard name """
return self.negative_log_likelihood_sum(y)
def errors(self, y):
if y.ndim != self.y_pred.ndim:
raise TypeError("y should have the same shape as self.y_pred",
("y", y.type, "y_pred", self.y_pred.type))
if y.dtype.startswith('int'):
return T.mean(T.neq(self.y_pred, y))
else:
print("!!! y should be of int type")
return T.mean(T.neq(self.y_pred, numpy.asarray(y, dtype='int')))
class NeuralNet(object):
""" Neural network (not regularized, without dropout) """
def __init__(self, numpy_rng, theano_rng=None,
n_ins=40*3,
layers_types=[Linear, ReLU, ReLU, ReLU, LogisticRegression],
layers_sizes=[1024, 1024, 1024, 1024],
n_outs=62 * 3,
rho=0.9,
eps=1.E-6,
max_norm=0.,
debugprint=False):
"""
Basic feedforward neural network.
"""
self.layers = []
self.params = []
self.n_layers = len(layers_types)
self.layers_types = layers_types
assert self.n_layers > 0
self.max_norm = max_norm
self._rho = rho # "momentum" for adadelta
self._eps = eps # epsilon for adadelta
self._accugrads = [] # for adadelta
self._accudeltas = [] # for adadelta
if theano_rng == None:
theano_rng = RandomStreams(numpy_rng.randint(2 ** 30))
self.x = T.fmatrix('x')
self.y = T.ivector('y')
self.layers_ins = [n_ins] + layers_sizes
self.layers_outs = layers_sizes + [n_outs]
layer_input = self.x
for layer_type, n_in, n_out in zip(layers_types,
self.layers_ins, self.layers_outs):
this_layer = layer_type(rng=numpy_rng,
input=layer_input, n_in=n_in, n_out=n_out)
assert hasattr(this_layer, 'output')
self.params.extend(this_layer.params)
self._accugrads.extend([build_shared_zeros(t.shape.eval(),
'accugrad') for t in this_layer.params])
self._accudeltas.extend([build_shared_zeros(t.shape.eval(),
'accudelta') for t in this_layer.params])
self.layers.append(this_layer)
layer_input = this_layer.output
assert hasattr(self.layers[-1], 'training_cost')
assert hasattr(self.layers[-1], 'errors')
# TODO standardize cost
self.mean_cost = self.layers[-1].negative_log_likelihood(self.y)
self.cost = self.layers[-1].training_cost(self.y)
if debugprint:
theano.printing.debugprint(self.cost)
self.errors = self.layers[-1].errors(self.y)
def __repr__(self):
dimensions_layers_str = map(lambda x: "x".join(map(str, x)),
zip(self.layers_ins, self.layers_outs))
return "_".join(map(lambda x: "_".join((x[0].__name__, x[1])),
zip(self.layers_types, dimensions_layers_str)))
def get_SGD_trainer(self):
""" Returns a plain SGD minibatch trainer with learning rate as param.
"""
batch_x = T.fmatrix('batch_x')
batch_y = T.ivector('batch_y')
learning_rate = T.fscalar('lr') # learning rate to use
# compute the gradients with respect to the model parameters
# using mean_cost so that the learning rate is not too dependent
# on the batch size
gparams = T.grad(self.mean_cost, self.params)
# compute list of weights updates
updates = OrderedDict()
for param, gparam in zip(self.params, gparams):
if self.max_norm:
W = param - gparam * learning_rate
col_norms = W.norm(2, axis=0)
desired_norms = T.clip(col_norms, 0, self.max_norm)
updates[param] = W * (desired_norms / (1e-6 + col_norms))
else:
updates[param] = param - gparam * learning_rate
train_fn = theano.function(inputs=[theano.Param(batch_x),
theano.Param(batch_y),
theano.Param(learning_rate)],
outputs=self.mean_cost,
updates=updates,
givens={self.x: batch_x, self.y: batch_y})
return train_fn
def get_adagrad_trainer(self):
""" Returns an Adagrad (Duchi et al. 2010) trainer using a learning rate.
"""
batch_x = T.fmatrix('batch_x')
batch_y = T.ivector('batch_y')
learning_rate = T.fscalar('lr') # learning rate to use
# compute the gradients with respect to the model parameters
gparams = T.grad(self.mean_cost, self.params)
# compute list of weights updates
updates = OrderedDict()
for accugrad, param, gparam in zip(self._accugrads, self.params, gparams):
# c.f. Algorithm 1 in the Adadelta paper (Zeiler 2012)
agrad = accugrad + gparam * gparam
dx = - (learning_rate / T.sqrt(agrad + self._eps)) * gparam
if self.max_norm:
W = param + dx
col_norms = W.norm(2, axis=0)
desired_norms = T.clip(col_norms, 0, self.max_norm)
updates[param] = W * (desired_norms / (1e-6 + col_norms))
else:
updates[param] = param + dx
updates[accugrad] = agrad
train_fn = theano.function(inputs=[theano.Param(batch_x),
theano.Param(batch_y),
theano.Param(learning_rate)],
outputs=self.mean_cost,
updates=updates,
givens={self.x: batch_x, self.y: batch_y})
return train_fn
def get_adadelta_trainer(self):
""" Returns an Adadelta (Zeiler 2012) trainer using self._rho and
self._eps params.
"""
batch_x = T.fmatrix('batch_x')
batch_y = T.ivector('batch_y')
# compute the gradients with respect to the model parameters
gparams = T.grad(self.mean_cost, self.params)
# compute list of weights updates
updates = OrderedDict()
for accugrad, accudelta, param, gparam in zip(self._accugrads,
self._accudeltas, self.params, gparams):
# c.f. Algorithm 1 in the Adadelta paper (Zeiler 2012)
agrad = self._rho * accugrad + (1 - self._rho) * gparam * gparam
dx = - T.sqrt((accudelta + self._eps)
/ (agrad + self._eps)) * gparam
updates[accudelta] = (self._rho * accudelta
+ (1 - self._rho) * dx * dx)
if self.max_norm:
W = param + dx
col_norms = W.norm(2, axis=0)
desired_norms = T.clip(col_norms, 0, self.max_norm)
updates[param] = W * (desired_norms / (1e-6 + col_norms))
else:
updates[param] = param + dx
updates[accugrad] = agrad
train_fn = theano.function(inputs=[theano.Param(batch_x),
theano.Param(batch_y)],
outputs=self.mean_cost,
updates=updates,
givens={self.x: batch_x, self.y: batch_y})
return train_fn
def score_classif(self, given_set):
""" Returns functions to get current classification errors. """
batch_x = T.fmatrix('batch_x')
batch_y = T.ivector('batch_y')
score = theano.function(inputs=[theano.Param(batch_x),
theano.Param(batch_y)],
outputs=self.errors,
givens={self.x: batch_x, self.y: batch_y})
def scoref():
""" returned function that scans the entire set given as input """
return [score(batch_x, batch_y) for batch_x, batch_y in given_set]
return scoref
class RegularizedNet(NeuralNet):
""" Neural net with L1 and L2 regularization """
def __init__(self, numpy_rng, theano_rng=None,
n_ins=100,
layers_types=[ReLU, ReLU, ReLU, LogisticRegression],
layers_sizes=[1024, 1024, 1024],
n_outs=2,
rho=0.9,
eps=1.E-6,
L1_reg=0.,
L2_reg=0.,
max_norm=0.,
debugprint=False):
"""
Feedforward neural network with added L1 and/or L2 regularization.
"""
super(RegularizedNet, self).__init__(numpy_rng, theano_rng, n_ins,
layers_types, layers_sizes, n_outs, rho, eps, max_norm,
debugprint)
L1 = shared(0.)
for param in self.params:
L1 += T.sum(abs(param))
if L1_reg > 0.:
self.cost = self.cost + L1_reg * L1
L2 = shared(0.)
for param in self.params:
L2 += T.sum(param ** 2)
if L2_reg > 0.:
self.cost = self.cost + L2_reg * L2
class DropoutNet(NeuralNet):
""" Neural net with dropout (see Hinton's et al. paper) """
def __init__(self, numpy_rng, theano_rng=None,
n_ins=40*3,
layers_types=[ReLU, ReLU, ReLU, ReLU, LogisticRegression],
layers_sizes=[4000, 4000, 4000, 4000],
dropout_rates=[0.2, 0.5, 0.5, 0.5, 0.5],
n_outs=62 * 3,
rho=0.9,
eps=1.E-6,
max_norm=0.,
fast_drop=False,
debugprint=False):
"""
Feedforward neural network with dropout regularization.
"""
super(DropoutNet, self).__init__(numpy_rng, theano_rng, n_ins,
layers_types, layers_sizes, n_outs, rho, eps, max_norm,
debugprint)
self.dropout_rates = dropout_rates
if fast_drop:
if dropout_rates[0]:
dropout_layer_input = fast_dropout(numpy_rng, self.x)
else:
dropout_layer_input = self.x
else:
dropout_layer_input = dropout(numpy_rng, self.x, p=dropout_rates[0])
self.dropout_layers = []
for layer, layer_type, n_in, n_out, dr in zip(self.layers,
layers_types, self.layers_ins, self.layers_outs,
dropout_rates[1:] + [0]): # !!! we do not dropout anything
# from the last layer !!!
if dr:
if fast_drop:
this_layer = layer_type(rng=numpy_rng,
input=dropout_layer_input, n_in=n_in, n_out=n_out,
W=layer.W, b=layer.b, fdrop=True)
else:
this_layer = layer_type(rng=numpy_rng,
input=dropout_layer_input, n_in=n_in, n_out=n_out,
W=layer.W * 1. / (1. - dr),
b=layer.b * 1. / (1. - dr))
# N.B. dropout with dr==1 does not dropanything!!
this_layer.output = dropout(numpy_rng, this_layer.output, dr)
else:
this_layer = layer_type(rng=numpy_rng,
input=dropout_layer_input, n_in=n_in, n_out=n_out,
W=layer.W, b=layer.b)
assert hasattr(this_layer, 'output')
self.dropout_layers.append(this_layer)
dropout_layer_input = this_layer.output
assert hasattr(self.layers[-1], 'training_cost')
assert hasattr(self.layers[-1], 'errors')
# these are the dropout costs
self.mean_cost = self.dropout_layers[-1].negative_log_likelihood(self.y)
self.cost = self.dropout_layers[-1].training_cost(self.y)
# these is the non-dropout errors
self.errors = self.layers[-1].errors(self.y)
def __repr__(self):
return super(DropoutNet, self).__repr__() + "\n"\
+ "dropout rates: " + str(self.dropout_rates)
def add_fit_and_score(class_to_chg):
""" Mutates a class to add the fit() and score() functions to a NeuralNet.
"""
from types import MethodType
def fit(self, x_train, y_train, x_dev=None, y_dev=None,
max_epochs=100, early_stopping=True, split_ratio=0.1,
method='adadelta', verbose=False, plot=False):
"""
Fits the neural network to `x_train` and `y_train`.
If x_dev nor y_dev are not given, it will do a `split_ratio` cross-
validation split on `x_train` and `y_train` (for early stopping).
"""
import time, copy
if x_dev == None or y_dev == None:
from sklearn.cross_validation import train_test_split
x_train, x_dev, y_train, y_dev = train_test_split(x_train, y_train,
test_size=split_ratio, random_state=42)
if method == 'sgd':
train_fn = self.get_SGD_trainer()
elif method == 'adagrad':
train_fn = self.get_adagrad_trainer()
elif method == 'adadelta':
train_fn = self.get_adadelta_trainer()
train_set_iterator = DatasetMiniBatchIterator(x_train, y_train)
dev_set_iterator = DatasetMiniBatchIterator(x_dev, y_dev)
train_scoref = self.score_classif(train_set_iterator)
dev_scoref = self.score_classif(dev_set_iterator)
best_dev_loss = numpy.inf
epoch = 0
# TODO early stopping (not just cross val, also stop training)
if plot:
verbose = True
self._costs = []
self._train_errors = []
self._dev_errors = []
self._updates = []
while epoch < max_epochs:
if not verbose:
sys.stdout.write("\r%0.2f%%" % (epoch * 100./ max_epochs))
sys.stdout.flush()
avg_costs = []
timer = time.time()
for x, y in train_set_iterator:
if method == 'sgd' or method == 'adagrad':
avg_cost = train_fn(x, y, lr=1.E-2) # TODO: you have to
# play with this
# learning rate
# (dataset dependent)
elif method == 'adadelta':
avg_cost = train_fn(x, y)
if type(avg_cost) == list:
avg_costs.append(avg_cost[0])
else:
avg_costs.append(avg_cost)
if verbose:
mean_costs = numpy.mean(avg_costs)
mean_train_errors = numpy.mean(train_scoref())
print(' epoch %i took %f seconds' %
(epoch, time.time() - timer))
print(' epoch %i, avg costs %f' %
(epoch, mean_costs))
print(' epoch %i, training error %f' %
(epoch, mean_train_errors))
if plot:
self._costs.append(mean_costs)
self._train_errors.append(mean_train_errors)
dev_errors = numpy.mean(dev_scoref())
if plot:
self._dev_errors.append(dev_errors)
if dev_errors < best_dev_loss:
best_dev_loss = dev_errors
best_params = copy.deepcopy(self.params)
if verbose:
print('!!! epoch %i, validation error of best model %f' %
(epoch, dev_errors))
epoch += 1
if not verbose:
print("")
for i, param in enumerate(best_params):
self.params[i] = param
def score(self, x, y):
""" error rates """
iterator = DatasetMiniBatchIterator(x, y)
scoref = self.score_classif(iterator)
return numpy.mean(scoref())
class_to_chg.fit = MethodType(fit, None, class_to_chg)
class_to_chg.score = MethodType(score, None, class_to_chg)
if __name__ == "__main__":
add_fit_and_score(DropoutNet)
add_fit_and_score(RegularizedNet)
def nudge_dataset(X, Y):
"""
This produces a dataset 5 times bigger than the original one,
by moving the 8x8 images in X around by 1px to left, right, down, up
"""
from scipy.ndimage import convolve
direction_vectors = [
[[0, 1, 0],
[0, 0, 0],
[0, 0, 0]],
[[0, 0, 0],
[1, 0, 0],
[0, 0, 0]],
[[0, 0, 0],
[0, 0, 1],
[0, 0, 0]],
[[0, 0, 0],
[0, 0, 0],
[0, 1, 0]]]
shift = lambda x, w: convolve(x.reshape((8, 8)), mode='constant',
weights=w).ravel()
X = numpy.concatenate([X] +
[numpy.apply_along_axis(shift, 1, X, vector)
for vector in direction_vectors])
Y = numpy.concatenate([Y for _ in range(5)], axis=0)
return X, Y
from sklearn import datasets, svm, naive_bayes
from sklearn import cross_validation, preprocessing
MNIST = True # MNIST dataset
DIGITS = False # digits dataset
FACES = True # faces dataset
TWENTYNEWSGROUPS = False # 20 newgroups dataset
VERBOSE = True # prints evolution of the loss/accuracy during the fitting
SCALE = True # scale the dataset
PLOT = True # plot losses and accuracies
def train_models(x_train, y_train, x_test, y_test, n_features, n_outs,
use_dropout=True, n_epochs=100, numpy_rng=None,
svms=False, nb=False, deepnn=True, name=''):
if svms:
print("Linear SVM")
classifier = svm.SVC(gamma=0.001)
print(classifier)
classifier.fit(x_train, y_train)
print("score: %f" % classifier.score(x_test, y_test))
print("RBF-kernel SVM")
classifier = svm.SVC(kernel='rbf', class_weight='auto')
print(classifier)
classifier.fit(x_train, y_train)
print("score: %f" % classifier.score(x_test, y_test))
if nb:
print("Multinomial Naive Bayes")
classifier = naive_bayes.MultinomialNB()
print(classifier)
classifier.fit(x_train, y_train)
print("score: %f" % classifier.score(x_test, y_test))
if deepnn:
import warnings
warnings.filterwarnings("ignore") # TODO remove
if use_dropout:
#n_epochs *= 4 TODO
pass
def new_dnn(dropout=False):
if dropout:
print("Dropout DNN")
return DropoutNet(numpy_rng=numpy_rng, n_ins=n_features,
layers_types=[ReLU, ReLU, LogisticRegression],
layers_sizes=[200, 200],
dropout_rates=[0.2, 0.5, 0.5],
# TODO if you have a big enough GPU, use these:
#layers_types=[ReLU, ReLU, ReLU, ReLU, LogisticRegression],
#layers_sizes=[2000, 2000, 2000, 2000],
#dropout_rates=[0.2, 0.5, 0.5, 0.5, 0.5],
n_outs=n_outs,
max_norm=4.,
fast_drop=True,
debugprint=0)
else:
print("Simple (regularized) DNN")
return RegularizedNet(numpy_rng=numpy_rng, n_ins=n_features,
layers_types=[ReLU, ReLU, LogisticRegression],
layers_sizes=[200, 200],
n_outs=n_outs,
#L1_reg=0.001/x_train.shape[0],
#L2_reg=0.001/x_train.shape[0],
L1_reg=0.,
L2_reg=1./x_train.shape[0],
debugprint=0)
import matplotlib.pyplot as plt
plt.figure()
ax1 = plt.subplot(221)
ax2 = plt.subplot(222)
ax3 = plt.subplot(223)
ax4 = plt.subplot(224) # TODO plot the updates of the weights
methods = ['sgd', 'adagrad', 'adadelta']
#methods = ['adadelta'] TODO if you want "good" results asap
for method in methods:
dnn = new_dnn(use_dropout)
print dnn, "using", method
dnn.fit(x_train, y_train, max_epochs=n_epochs, method=method, verbose=VERBOSE, plot=PLOT)
test_error = dnn.score(x_test, y_test)
print("score: %f" % (1. - test_error))
ax1.plot(numpy.log10(dnn._costs), label=method)
ax2.plot(numpy.log10(dnn._train_errors), label=method)
ax3.plot(numpy.log10(dnn._dev_errors), label=method)
#ax2.plot(dnn._train_errors, label=method)
#ax3.plot(dnn._dev_errors, label=method)
ax4.plot([test_error for _ in range(10)], label=method)
ax1.set_xlabel('epoch')
ax1.set_ylabel('cost (log10)')
ax2.set_xlabel('epoch')
ax2.set_ylabel('train error')
ax3.set_xlabel('epoch')
ax3.set_ylabel('dev error')
ax4.set_ylabel('test error')
plt.legend()
plt.savefig('training_' + name + '.png')
if MNIST:
from sklearn.datasets import fetch_mldata
mnist = fetch_mldata('MNIST original')
X = numpy.asarray(mnist.data, dtype='float32')
if SCALE:
#X = preprocessing.scale(X)
X /= 255.
y = numpy.asarray(mnist.target, dtype='int32')
print("Total dataset size:")
print("n samples: %d" % X.shape[0])
print("n features: %d" % X.shape[1])
print("n classes: %d" % len(set(y)))
x_train, x_test, y_train, y_test = cross_validation.train_test_split(
X, y, test_size=0.2, random_state=42)
train_models(x_train, y_train, x_test, y_test, X.shape[1],
len(set(y)), numpy_rng=numpy.random.RandomState(123),
name='MNIST')
if DIGITS:
digits = datasets.load_digits()
data = numpy.asarray(digits.data, dtype='float32')
target = numpy.asarray(digits.target, dtype='int32')
nudged_x, nudged_y = nudge_dataset(data, target)
if SCALE:
nudged_x = preprocessing.scale(nudged_x)
x_train, x_test, y_train, y_test = cross_validation.train_test_split(
nudged_x, nudged_y, test_size=0.2, random_state=42)
train_models(x_train, y_train, x_test, y_test, nudged_x.shape[1],
len(set(target)), numpy_rng=numpy.random.RandomState(123),
name='digits')
if FACES:
import logging
logging.basicConfig(level=logging.INFO,
format='%(asctime)s %(message)s')
lfw_people = datasets.fetch_lfw_people(min_faces_per_person=70,
resize=0.4)
X = numpy.asarray(lfw_people.data, dtype='float32')
if SCALE:
X = preprocessing.scale(X)
y = numpy.asarray(lfw_people.target, dtype='int32')
target_names = lfw_people.target_names
print("Total dataset size:")
print("n samples: %d" % X.shape[0])
print("n features: %d" % X.shape[1])
print("n classes: %d" % target_names.shape[0])
x_train, x_test, y_train, y_test = cross_validation.train_test_split(
X, y, test_size=0.2, random_state=42)
train_models(x_train, y_train, x_test, y_test, X.shape[1],
len(set(y)), numpy_rng=numpy.random.RandomState(123),
name='faces')
if TWENTYNEWSGROUPS:
from sklearn.feature_extraction.text import TfidfVectorizer
newsgroups_train = datasets.fetch_20newsgroups(subset='train')
vectorizer = TfidfVectorizer(encoding='latin-1', max_features=10000)
#vectorizer = HashingVectorizer(encoding='latin-1')
x_train = vectorizer.fit_transform(newsgroups_train.data)
x_train = numpy.asarray(x_train.todense(), dtype='float32')
y_train = numpy.asarray(newsgroups_train.target, dtype='int32')
newsgroups_test = datasets.fetch_20newsgroups(subset='test')
x_test = vectorizer.transform(newsgroups_test.data)
x_test = numpy.asarray(x_test.todense(), dtype='float32')
y_test = numpy.asarray(newsgroups_test.target, dtype='int32')
train_models(x_train, y_train, x_test, y_test, x_train.shape[1],
len(set(y_train)),
numpy_rng=numpy.random.RandomState(123),
svms=False, nb=True, deepnn=True,
name='20newsgroups')
@danbri
Copy link

danbri commented Aug 16, 2014

On OSX,

./dnn.py
Total dataset size:
n samples: 70000
n features: 784
n classes: 10
Dropout DNN
ReLU_784x200_ReLU_200x200_LogisticRegression_200x10
dropout rates: [0.0, 0.5, 0.5] using sgd
Traceback (most recent call last):
File "./dnn.py", line 684, in
name='MNIST')
File "./dnn.py", line 647, in train_models
dnn.fit(x_train, y_train, max_epochs=n_epochs, method=method, verbose=VERBOSE, plot=PLOT)
File "./dnn.py", line 490, in fit
avg_cost = train_fn(x, y, lr=1.E-2) # TODO: you have to
File "/Library/Python/2.7/site-packages/Theano-0.6.0-py2.7.egg/theano/compile/function_module.py", line 516, in call
self[k] = arg
File "/Library/Python/2.7/site-packages/Theano-0.6.0-py2.7.egg/theano/compile/function_module.py", line 452, in setitem
self.value[item] = value
File "/Library/Python/2.7/site-packages/Theano-0.6.0-py2.7.egg/theano/compile/function_module.py", line 415, in setitem
s.value = value
File "/Library/Python/2.7/site-packages/Theano-0.6.0-py2.7.egg/theano/gof/link.py", line 278, in set
self.storage[0] = self.type.filter(value, **kwargs)
File "/Library/Python/2.7/site-packages/Theano-0.6.0-py2.7.egg/theano/tensor/type.py", line 152, in filter
raise TypeError(err_msg, data)
TypeError: ('TensorType(float32, scalar) cannot store accurately value 0.01, it would be represented as 0.00999999977648. If you do not mind this precision loss, you can: 1) explicitly convert your data to a numpy array of dtype float32, or 2) set "allow_input_downcast=True" when calling "function".', 0.01, 'Container name "lr"')

@ddofer
Copy link

ddofer commented Oct 7, 2014

On Windows 7, 64 bit, Python 3.4:

Traceback (most recent call last):
File "E:\Anaconda3\lib\site-packages\theano\gof\lazylinker_c.py", line 59, in
raise ImportError()
ImportError

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
File "E:\Anaconda3\lib\site-packages\theano\gof\lazylinker_c.py", line 76, in
raise ImportError()
ImportError

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
File "E:\Dropbox\Dropbox\BioInformatics Lab\AdaHERF_ML-master\ModdedScripts\dnn.py", line 7, in
import numpy, theano, sys, math
File "E:\Anaconda3\lib\site-packages\theano__init__.py", line 55, in
from theano.compile import
File "E:\Anaconda3\lib\site-packages\theano\compile__init__.py", line 6, in
from theano.compile.function_module import *
File "E:\Anaconda3\lib\site-packages\theano\compile\function_module.py", line 18, in
import theano.compile.mode
File "E:\Anaconda3\lib\site-packages\theano\compile\mode.py", line 11, in
import theano.gof.vm
File "E:\Anaconda3\lib\site-packages\theano\gof\vm.py", line 516, in
from . import lazylinker_c
File "E:\Anaconda3\lib\site-packages\theano\gof\lazylinker_c.py", line 85, in
args = cmodule.GCC_compiler.compile_args()
File "E:\Anaconda3\lib\site-packages\theano\gof\cmodule.py", line 1603, in compile_args
native_lines = get_lines("g++ -march=native -E -v -")
File "E:\Anaconda3\lib\site-packages\theano\gof\cmodule.py", line 1577, in get_lines
(stdout, stderr) = p.communicate(input='')
File "E:\Anaconda3\lib\subprocess.py", line 959, in communicate
stdout, stderr = self._communicate(input, endtime, timeout)
File "E:\Anaconda3\lib\subprocess.py", line 1195, in _communicate
self.stdin.write(input)
TypeError: 'str' does not support the buffer interface
[Finished in 6.6s with exit code 1]
[shell_cmd: python -u "E:\Dropbox\Dropbox\BioInformatics Lab\AdaHERF_ML-master\ModdedScripts\dnn.py"]

@dbonadiman
Copy link

in order to make the code works on OSX i substituted
lr=1.E-2
with
lr=numpy.asarray(1.E-2, dtype='float32')

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment