Coagulated neural network with different dimensions

I am trying to create a deep CNN that can classify every single pixel in an image. I am replicating the architecture from the image below taken from this article. The article mentions that deconvolutions are used so that any input size is possible. This can be seen in the image below.

Github repository

enter image description here

I have currently hard-coded my model for receiving 32x32x7 images, but I would like to accept any input size. What changes do I need to make to my code to accept variable sized input?

 x = tf.placeholder(tf.float32, shape=[None, 32*32*7])
 y_ = tf.placeholder(tf.float32, shape=[None, 32*32*7, 3])
 ...
 DeConnv1 = tf.nn.conv3d_transpose(layer1, filter = w, output_shape = [1,32,32,7,1], strides = [1,2,2,2,1], padding = 'SAME')
 ...
 final = tf.reshape(final, [1, 32*32*7])
 W_final = weight_variable([32*32*7,32*32*7,3])
 b_final = bias_variable([32*32*7,3])
 final_conv = tf.tensordot(final, W_final, axes=[[1], [1]]) + b_final
+7
2

Tensorflow (aka None) . , , .

...

x = tf.placeholder(tf.float32, shape=[None, N*M*P])
y_ = tf.placeholder(tf.float32, shape=[None, N*M*P, 3])
...
x_image = tf.reshape(x, [-1, N, M, P, 1])

...

# Nearly all dimensions are dynamic
x_image = tf.placeholder(tf.float32, shape=[None, None, None, None, 1])
label = tf.placeholder(tf.float32, shape=[None, None, 3])

5D, 5D x_image . label , , x_image.

, tf.nn.conv3d_transpose , . :

# Hard-coded output shape
DeConnv1 = tf.nn.conv3d_transpose(layer1, w, output_shape=[1,32,32,7,1], ...)

... :

# Dynamic output shape
DeConnv1 = tf.nn.conv3d_transpose(layer1, w, output_shape=tf.shape(x_image), ...)

, x_image , x_image .

, x_image (?,?,?,?, 1).

, , . , .

, Springenberg at al FC CONV " : ". 3 1x1x1 (. ):

final_conv = conv3d_s1(final, weight_variable([1, 1, 1, 1, 3]))
y = tf.reshape(final_conv, [-1, 3])

, final , DeConnv1 ( ), y , : [-1, N * M * P, 3].

, deconvolutions , . , , . : , .

:

sess = tf.InteractiveSession()

def conv3d_dilation(tempX, tempFilter):
  return tf.layers.conv3d(tempX, filters=tempFilter, kernel_size=[3, 3, 1], strides=1, padding='SAME', dilation_rate=2)

def conv3d(tempX, tempW):
  return tf.nn.conv3d(tempX, tempW, strides=[1, 2, 2, 2, 1], padding='SAME')

def conv3d_s1(tempX, tempW):
  return tf.nn.conv3d(tempX, tempW, strides=[1, 1, 1, 1, 1], padding='SAME')

def weight_variable(shape):
  initial = tf.truncated_normal(shape, stddev=0.1)
  return tf.Variable(initial)

def bias_variable(shape):
  initial = tf.constant(0.1, shape=shape)
  return tf.Variable(initial)

def max_pool_3x3(x):
  return tf.nn.max_pool3d(x, ksize=[1, 3, 3, 3, 1], strides=[1, 2, 2, 2, 1], padding='SAME')

x_image = tf.placeholder(tf.float32, shape=[None, None, None, None, 1])
label = tf.placeholder(tf.float32, shape=[None, None, 3])

W_conv1 = weight_variable([3, 3, 1, 1, 32])
h_conv1 = conv3d(x_image, W_conv1)
# second convolution
W_conv2 = weight_variable([3, 3, 4, 32, 64])
h_conv2 = conv3d_s1(h_conv1, W_conv2)
# third convolution path 1
W_conv3_A = weight_variable([1, 1, 1, 64, 64])
h_conv3_A = conv3d_s1(h_conv2, W_conv3_A)
# third convolution path 2
W_conv3_B = weight_variable([1, 1, 1, 64, 64])
h_conv3_B = conv3d_s1(h_conv2, W_conv3_B)
# fourth convolution path 1
W_conv4_A = weight_variable([3, 3, 1, 64, 96])
h_conv4_A = conv3d_s1(h_conv3_A, W_conv4_A)
# fourth convolution path 2
W_conv4_B = weight_variable([1, 7, 1, 64, 64])
h_conv4_B = conv3d_s1(h_conv3_B, W_conv4_B)
# fifth convolution path 2
W_conv5_B = weight_variable([1, 7, 1, 64, 64])
h_conv5_B = conv3d_s1(h_conv4_B, W_conv5_B)
# sixth convolution path 2
W_conv6_B = weight_variable([3, 3, 1, 64, 96])
h_conv6_B = conv3d_s1(h_conv5_B, W_conv6_B)
# concatenation
layer1 = tf.concat([h_conv4_A, h_conv6_B], 4)
w = tf.Variable(tf.constant(1., shape=[2, 2, 4, 1, 192]))
DeConnv1 = tf.nn.conv3d_transpose(layer1, filter=w, output_shape=tf.shape(x_image), strides=[1, 2, 2, 2, 1], padding='SAME')

final = DeConnv1
final_conv = conv3d_s1(final, weight_variable([1, 1, 1, 1, 3]))
y = tf.reshape(final_conv, [-1, 3])
cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=label, logits=y))

print('x_image:', x_image)
print('DeConnv1:', DeConnv1)
print('final_conv:', final_conv)

def try_image(N, M, P, B=1):
  batch_x = np.random.normal(size=[B, N, M, P, 1])
  batch_y = np.ones([B, N * M * P, 3]) / 3.0

  deconv_val, final_conv_val, loss = sess.run([DeConnv1, final_conv, cross_entropy],
                                              feed_dict={x_image: batch_x, label: batch_y})
  print(deconv_val.shape)
  print(final_conv.shape)
  print(loss)
  print()

tf.global_variables_initializer().run()
try_image(32, 32, 7)
try_image(16, 16, 3)
try_image(16, 16, 3, 2)
+5
. none, .

. tf.shape tf.get_shape(). , session.run, , . none, (, None).

, tf.layers.conv2d upconv2d, tf.shape, , , .

, , . , .

-1

All Articles