How to accumulate and add gradients for asynchronous n-step update of DQNetwork in Tensorflow?

Question

How to accumulate and add gradients for asynchronous n-step update of DQNetwork in Tensorflow?

I am trying to implement Asynchronous deep gain teaching methods , and for one of the steps, I need to copy the gradient to different stages, and then apply This. What is the best way to achieve this in tensor flow? I got to the point that I had accumulated the gradient, and I don’t think that this is the fastest way to achieve it (many transfers from tensorflow to python and vice versa). Any suggestions are welcome. This is my NN toy code. It does not simulate and calculate everything that it simply performs the operations that I want to use.

import tensorflow as tf from model import * graph = tf.Graph() with graph.as_default(): state = tf.placeholder(tf.float32, shape=[None, 80,80,1]) with tf.variable_scope('layer1'): W = weight_variable([8, 8, 1, 32]) variable_summaries(W, "layer1/W") b = bias_variable([32]) variable_summaries(b, "layer1/b") h = conv2d(state, W, 4) + b activation = tf.nn.relu(h) pool1 = max_pool_2x2(activation) print(pool1.get_shape()) pool1 = tf.reshape(pool1, [-1, 3200]) with tf.variable_scope('readout'): W = weight_variable([3200, 3]) b = bias_variable([3]) logits = tf.matmul(pool1, W) + b variable_summaries(h, "y") action_indexes = tf.placeholder(tf.int32, shape=[None], name="action_indexes") loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits, action_indexes) starter_learning_rate = 1e-6 global_step = tf.Variable(0, trainable=False) # decay every 1000 steps with a base of 0.96: learning_rate = tf.train.exponential_decay(starter_learning_rate, global_step, 10000, 0.96, staircase=True) optimizer = tf.train.RMSPropOptimizer(learning_rate) gradients_and_variables = optimizer.compute_gradients(loss, tf.trainable_variables()) discounted_values = tf.placeholder(tf.float32, shape=[None, 1]) with tf.Session(graph=graph) as s: for v in tf.trainable_variables(): print(v.name, v.dtype, v.get_shape()) s.run(tf.initialize_all_variables()) feed_dict= { state : np.zeros([1, 80, 80, 1]), action_indexes: [1], } var_to_grad = dict((var.name, grad) for grad, var in gradients_and_variables) keys = sorted(var_to_grad.keys()) print(keys) name_to_var = dict((var.name, var) for _, var in gradients_and_variables) for i in range(10): gradients = s.run([ var_to_grad[k] for k in keys], feed_dict=feed_dict) for k,v in zip(keys, gradients): var_to_grad[k] += v for k in keys: print(var_to_grad[k]) s.run( optimizer.apply_gradients( (g, name_to_var[v]) for v,g in var_to_grad.iteritems()), feed_dict=feed_dict)

Updated code after @yaroslave suggestion:

 import tensorflow as tf from model import * graph = tf.Graph() with graph.as_default(): minibatch = 32 state = tf.placeholder(tf.float32, shape=[minibatch, 80,80,1], name="input") with tf.variable_scope('layer1'): W = weight_variable([8, 8, 1, 32]) variable_summaries(W, "layer1/W") b = bias_variable([32]) variable_summaries(b, "layer1/b") h = conv2d(state, W, 4) + b activation = tf.nn.relu(h) pool1 = max_pool_2x2(activation) print(pool1.get_shape()) pool1 = tf.reshape(pool1, [-1, 3200]) with tf.variable_scope('readout'): W = weight_variable([3200, 3]) b = bias_variable([3]) logits = tf.matmul(pool1, W) + b variable_summaries(h, "y") action_indexes = tf.placeholder(tf.int32, shape=[minibatch], name="action_indexes") loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits, action_indexes) starter_learning_rate = 1e-6 global_step = tf.Variable(0, trainable=False) # decay every 1000 steps with a base of 0.96: learning_rate = tf.train.exponential_decay(starter_learning_rate, global_step, 10000, 0.96, staircase=True) optimizer = tf.train.RMSPropOptimizer(learning_rate) trainable_variables = tf.trainable_variables() varname_to_var = dict( (v.name, v) for v in trainable_variables ) keys = sorted(varname_to_var.keys()) gradients_and_variables = optimizer.compute_gradients(loss, [ varname_to_var[k] for k in keys]) var_to_grad = dict((var.name, grad) for grad, var in gradients_and_variables) name_to_var = dict((var.name, var) for _, var in gradients_and_variables) # save the gradients in memory var_to_ref_grad = {} for k in keys: grad = var_to_grad[k] print(k, grad.get_shape()) ref = tf.Variable(tf.zeros_like(grad)) ref = ref.assign_add(grad) var_to_ref_grad[k] = ref discounted_values = tf.placeholder(tf.float32, shape=[None, 1], name='discounted_values') # control when to apply gradients compute_gradients_flag = tf.placeholder(tf.int32, name="compute_gradients") def fn1(): var_grad_list = [] for k in keys: grad = var_to_ref_grad[k] var = varname_to_var[k] var_grad_list.append((grad,var)) optimizer.apply_gradients(var_grad_list) return tf.no_op() fn2 = lambda : tf.no_op() last_op = tf.cond(tf.equal(compute_gradients_flag, 1), fn1, fn2) with tf.Session(graph=graph) as s: feed_dict= { state : np.zeros([minibatch, 80, 80, 1]), action_indexes: [1], compute_gradients_flag: False, } s.run(tf.initialize_all_variables()) for i in range(10): # accumulate gradients s.run(last_op, feed_dict=feed_dict)

+8

reinforcement-learning neural-network tensorflow

fabrizioM Jun 08 '16 at 19:12

source share

2 answers

Mehran shakerinava · Answer 1 · 2017-08-18T08:19:08+0000

You do not need to manually accumulate gradients. You can use Tensorflow for you by applying a deployment update as a package.

 s_list = list_of_states_visited a_list = list_of_actions_taken R_list = list_of_value_targets sess.run(local_net.update, feed_dict={ local_net.input: s_list, local_net.a: a_list, local_net.R: R_list })

eqzx · Answer 2 · 2017-10-03T16:06:16+0000

Something like this might work to create ops to accumulate gradients, reset accumulated gradients, and apply accumulated gradients (untested!):

 def build_gradient_accumulators(optimizer, gradients_and_variables): accum_grads_and_vars = [] accumulators = [] resetters = [] for grad, var in gradients_and_variables: accum = tf.Variable(tf.zeros_like(grad)) accum = accum.assign_add(grad) accumulators.append(accum) accum_grads_and_vars.append((accum, var)) resetters.append(tf.assign(accum, tf.zeros_like(accum))) reset_op = tf.group(*resetters) accum_op = tf.group(*accumulators) apply_op = optimizer.apply_gradients(accum_grads_and_vars) return reset_op, accum_op, apply_op

How to accumulate and add gradients for asynchronous n-step update of DQNetwork in Tensorflow?

More articles: