This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def train(mnist, max_layers): | |
sess = tf.Session() | |
global_step = tf.Variable(0, trainable=False) | |
starter_learning_rate = 0.1 | |
learning_rate = tf.train.exponential_decay(0.99, global_step, | |
500, 0.96, staircase=True) | |
optimizer = tf.train.RMSPropOptimizer(learning_rate=learning_rate) | |
reinforce = Reinforce(sess, optimizer, policy_network, args.max_layers, global_step) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
with tf.Session() as train_sess: | |
init = tf.global_variables_initializer() | |
train_sess.run(init) | |
for step in range(self.max_step_per_action): | |
batch_x, batch_y = self.mnist.train.next_batch(self.bathc_size) | |
feed = {model.X: batch_x, | |
model.Y: batch_y, | |
model.dropout_keep_prob: self.dropout_rate, | |
model.cnn_dropout_rates: cnn_drop_rate} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def get_reward(self, action, step, pre_acc): | |
action = [action[0][0][x:x+4] for x in range(0, len(action[0][0]), 4)] | |
cnn_drop_rate = [c[3] for c in action] | |
Then we formed bathc with hyperparameters for every layer in "action" and we created cnn_drop_rate – list of dropout rates for every layer. | |
Now let's create new CNN with new architecture: | |
with tf.Graph().as_default() as g: | |
with g.container('experiment'+str(step)): | |
model = CNN(self.num_input, self.num_classes, action) | |
loss_op = tf.reduce_mean(model.loss) | |
optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class NetManager(): | |
def __init__(self, num_input, num_classes, learning_rate, mnist, | |
max_step_per_action=5500, | |
bathc_size=100, | |
dropout_rate=0.85): | |
self.num_input = num_input | |
self.num_classes = num_classes | |
self.learning_rate = learning_rate | |
self.mnist = mnist |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def store_rollout(self, state, reward): | |
self.reward_buffer.append(reward) | |
self.state_buffer.append(state[0]) | |
def train_step(self, steps_count): | |
states = np.array(self.state_buffer[-steps_count:])/self.division_rate | |
rewars = self.reward_buffer[-steps_count:] | |
_, ls = self.sess.run([self.train_op, self.loss], | |
{self.states: states, | |
self.discounted_rewards: rewars}) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def get_action(self, state): | |
return self.sess.run(self.predicted_action, {self.states: state}) | |
if random.random() < self.exploration: | |
return np.array([[random.sample(range(1, 35), 4*self.max_layers)]]) | |
else: | |
return self.sess.run(self.predicted_action, {self.states: state}) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
self.create_variables() | |
var_lists = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES) | |
self.sess.run(tf.variables_initializer(var_lists)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def create_variables(self): | |
with tf.name_scope("model_inputs"): | |
# raw state representation | |
self.states = tf.placeholder(tf.float32, [None, self.max_layers*4], name="states") | |
with tf.name_scope("predict_actions"): | |
# initialize policy network | |
with tf.variable_scope("policy_network"): | |
self.policy_outputs = self.policy_network(self.states, self.max_layers) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class Reinforce(): | |
def __init__(self, sess, optimizer, policy_network, max_layers, global_step, | |
division_rate=100.0, | |
reg_param=0.001, | |
discount_factor=0.99, | |
exploration=0.3): | |
self.sess = sess | |
self.optimizer = optimizer | |
self.policy_network = policy_network | |
self.division_rate = division_rate |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def policy_network(state, max_layers): | |
with tf.name_scope("policy_network"): | |
nas_cell = tf.contrib.rnn.NASCell(4*max_layers) | |
outputs, state = tf.nn.dynamic_rnn( | |
nas_cell, | |
tf.expand_dims(state, -1), | |
dtype=tf.float32 | |
) | |
bias = tf.Variable([0.05]*4*max_layers) | |
outputs = tf.nn.bias_add(outputs, bias) |