Skip to content

Instantly share code, notes, and snippets.

View d0znpp's full-sized avatar

Ivan Novikov d0znpp

View GitHub Profile
def policy_network(state, max_layers):
with tf.name_scope("policy_network"):
nas_cell = tf.contrib.rnn.NASCell(4*max_layers)
outputs, state = tf.nn.dynamic_rnn(
nas_cell,
tf.expand_dims(state, -1),
dtype=tf.float32
)
bias = tf.Variable([0.05]*4*max_layers)
outputs = tf.nn.bias_add(outputs, bias)
class Reinforce():
def __init__(self, sess, optimizer, policy_network, max_layers, global_step,
division_rate=100.0,
reg_param=0.001,
discount_factor=0.99,
exploration=0.3):
self.sess = sess
self.optimizer = optimizer
self.policy_network = policy_network
self.division_rate = division_rate
def create_variables(self):
with tf.name_scope("model_inputs"):
# raw state representation
self.states = tf.placeholder(tf.float32, [None, self.max_layers*4], name="states")
with tf.name_scope("predict_actions"):
# initialize policy network
with tf.variable_scope("policy_network"):
self.policy_outputs = self.policy_network(self.states, self.max_layers)
self.create_variables()
var_lists = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)
self.sess.run(tf.variables_initializer(var_lists))
def get_action(self, state):
return self.sess.run(self.predicted_action, {self.states: state})
if random.random() < self.exploration:
return np.array([[random.sample(range(1, 35), 4*self.max_layers)]])
else:
return self.sess.run(self.predicted_action, {self.states: state})
def store_rollout(self, state, reward):
self.reward_buffer.append(reward)
self.state_buffer.append(state[0])
def train_step(self, steps_count):
states = np.array(self.state_buffer[-steps_count:])/self.division_rate
rewars = self.reward_buffer[-steps_count:]
_, ls = self.sess.run([self.train_op, self.loss],
{self.states: states,
self.discounted_rewards: rewars})
class NetManager():
def __init__(self, num_input, num_classes, learning_rate, mnist,
max_step_per_action=5500,
bathc_size=100,
dropout_rate=0.85):
self.num_input = num_input
self.num_classes = num_classes
self.learning_rate = learning_rate
self.mnist = mnist
def get_reward(self, action, step, pre_acc):
action = [action[0][0][x:x+4] for x in range(0, len(action[0][0]), 4)]
cnn_drop_rate = [c[3] for c in action]
Then we formed bathc with hyperparameters for every layer in "action" and we created cnn_drop_rate – list of dropout rates for every layer.
Now let's create new CNN with new architecture:
with tf.Graph().as_default() as g:
with g.container('experiment'+str(step)):
model = CNN(self.num_input, self.num_classes, action)
loss_op = tf.reduce_mean(model.loss)
optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate)
with tf.Session() as train_sess:
init = tf.global_variables_initializer()
train_sess.run(init)
for step in range(self.max_step_per_action):
batch_x, batch_y = self.mnist.train.next_batch(self.bathc_size)
feed = {model.X: batch_x,
model.Y: batch_y,
model.dropout_keep_prob: self.dropout_rate,
model.cnn_dropout_rates: cnn_drop_rate}
def train(mnist, max_layers):
sess = tf.Session()
global_step = tf.Variable(0, trainable=False)
starter_learning_rate = 0.1
learning_rate = tf.train.exponential_decay(0.99, global_step,
500, 0.96, staircase=True)
optimizer = tf.train.RMSPropOptimizer(learning_rate=learning_rate)
reinforce = Reinforce(sess, optimizer, policy_network, args.max_layers, global_step)