Tensorflow: batch input queues and then changing the queue source

Question

Tensorflow: batch input queues and then changing the queue source

I have a model that works on a set of images and calculates some statistics on them using - for simplicity they say that it only outputs the average image from the set (in practice, this is more than in practice). I have several directories containing images, and I want to get output from each directory. Each directory has a variable number of images in it.

I plotted the output variable and the loss function once for my script. Inputs are loaded using slightly adapted code here . I adapted it to take an array of paths that I use using variable sized placeholder. I got inspiration for this here .

Then I iterate over the directories and run the following:

Initialize variables (this resets the previous output variable from the results calculated in the previous directory)
Set the path variable of the path to the current file array from the new directory: sess.run(image_paths.initializer, feed_dict={image_paths_initializer: image_paths})
Run the running queues: queue_threads = tf.train.start_queue_runners(sess=sess, coord=coord)
Run for several eras to get results
Close threads coord.request_stop(); coord.join(queue_threads); coord.clear_stop()
Return the results, save the results, go to the next directory ...

, , ( , queue_threads). , :

Compute status: Aborted: FIFOQueue '_1_input_producer' is closed.
Compute status: Aborted: RandomShuffleQueue '_0_shuffle_batch/random_shuffle_queue' is closed.

( ), - op (2). ?

, CUDA/. , , , , /?

, , . (.. ) , , , .

- SO folk ?

+4

python multithreading queue tensorflow

lopsided 31 . '16 13:05

3

@yaroslav-bulatov , .

, . FIFOQueue , , shuffle_batch, , . , , , - , ! RandomShuffleQueue , . , . , , .

, . !

import os
import tensorflow as tf
import numpy as np
from itertools import cycle

output_dir = '/my/output/dir'

my_dirs = [
    [
        '/path/to/datasets/blacksquares/black_square_100x100.png',
        '/path/to/datasets/blacksquares/black_square_200x200.png',
        '/path/to/datasets/blacksquares/black_square_300x300.png'
    ],
    [
        '/path/to/datasets/whitesquares/white_square_100x100.png',
        '/path/to/datasets/whitesquares/white_square_200x200.png',
        '/path/to/datasets/whitesquares/white_square_300x300.png',
        '/path/to/datasets/whitesquares/white_square_400x400.png'
    ],
    [
        '/path/to/datasets/mixedsquares/black_square_200x200.png',
        '/path/to/datasets/mixedsquares/white_square_200x200.png'
    ]
]

# set vars
patch_size = (100, 100, 1)
batch_size = 20
queue_capacity = 1000

# setup filename queue
filename_queue = tf.FIFOQueue(
    capacity=queue_capacity,
    dtypes=tf.string,
    shapes=[[]]
)
filenames_placeholder = tf.placeholder(dtype='string', shape=(None))
filenames_enqueue_op = filename_queue.enqueue_many(filenames_placeholder)

# read file and preprocess
image_reader = tf.WholeFileReader()
key, file = image_reader.read(filename_queue)
uint8image = tf.image.decode_png(file)
cropped_image = tf.random_crop(uint8image, patch_size) # take a random 100x100 crop
float_image = tf.div(tf.cast(cropped_image, tf.float32), 255) # put pixels in the [0,1] range

# setup shuffle batch queue for training images
images_queue = tf.RandomShuffleQueue(
    capacity=queue_capacity,
    min_after_dequeue=0,  # allow queue to become completely empty (as we need to empty it)
    dtypes=tf.float32,
    shapes=patch_size
)
images_enqueue_op = images_queue.enqueue(float_image)

# setup simple computation - calculate an average image patch
input = tf.placeholder(shape=(None,) + patch_size, dtype=tf.float32)
avg_image = tf.Variable(np.random.normal(loc=0.5, scale=0.5, size=patch_size).astype(np.float32))
loss = tf.nn.l2_loss(tf.sub(avg_image, input))
train_op = tf.train.AdamOptimizer(2.).minimize(loss)

# start session and initialize variables
sess = tf.InteractiveSession()
sess.run(tf.initialize_all_variables())

# note - no need to start any queue runners as I've done away with them

for dir_index, image_paths in enumerate(my_dirs):
    image_paths_cycle = cycle(image_paths)

    # reset the optimisation and training vars
    sess.run(tf.initialize_all_variables())

    num_epochs = 1000
    for i in range(num_epochs):
        # keep the filename queue at capacity
        size = sess.run(filename_queue.size())
        image_paths = []
        while size < queue_capacity:
            image_paths.append(next(image_paths_cycle))
            size += 1
        sess.run(filenames_enqueue_op, feed_dict={filenames_placeholder: image_paths})

        # keep the shuffle batch queue at capacity
        size = sess.run(images_queue.size())
        while size < queue_capacity:
            sess.run([images_enqueue_op])
            size += 1

        # get next (random) batch of training images
        batch = images_queue.dequeue_many(batch_size).eval()

        # run train op
        _, result, loss_i = sess.run([train_op, avg_image, loss], feed_dict={input: batch})
        print('Iteration {:d}. Loss: {:.2f}'.format(i, loss_i))

        # early stopping :)
        if loss_i < 0.05:
            break

    # empty filename queue and verify empty
    size = sess.run(filename_queue.size())
    sess.run(filename_queue.dequeue_many(size))
    size = sess.run(filename_queue.size())
    assert size == 0

    # empty batch queue and verify empty
    size = sess.run(images_queue.size())
    sess.run(images_queue.dequeue_many(size))
    size = sess.run(filename_queue.size())
    assert size == 0

    # save the average image output
    result_image = np.clip(result * 255, 0, 255).astype(np.uint8)
    with open(os.path.join(output_dir, 'result_' + str(dir_index)), 'wb') as result_file:
        result_file.write(tf.image.encode_png(result_image).eval())

print('Happy days!')
exit(0)

result_0 - , result_1 - result_2 - ( ) .

+4

lopsided 01 . '16 11:32

, tf.train.match_filenames_once. string_input_producer.

q = tf.train.string_input_producer(
  tf.train.match_filenames_once('/path/to/datasets/*.png')
)

. , , .

0

Salvador Dali 02 . '17 8:52

Yaroslav Bulatov · Accepted Answer · 2016-03-31T13:29:42+0000

string_input_producer FIFOQueue + QueueRunner. , FIFOQueue enqueue. -

filename_queue = tf.FIFOQueue(100, tf.string)
enqueue_placeholder = tf.placeholder(dtype=tf.string)
enqueue_op = filename_queue.enqueue(enqueue_placeholder)

config = tf.ConfigProto()
config.operation_timeout_in_ms=2000  # for debugging queue hangs
sess = tf.InteractiveSession(config=config)
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord=coord)

sess.run([enqueue_op], feed_dict={enqueue_placeholder:"/temp/dir1/0"})
sess.run([enqueue_op], feed_dict={enqueue_placeholder:"/temp/dir1/1"})

# do stats for /temp/dir1

sess.run([enqueue_op], feed_dict={enqueue_placeholder:"/temp/dir2/0"})
sess.run([enqueue_op], feed_dict={enqueue_placeholder:"/temp/dir2/1"})

# do stats for /temp/dir2

coord.request_stop()
coord.join(threads)

Tensorflow: batch input queues and then changing the queue source

More articles: