I train a convolutional neural network using TensorFlow to classify images of buildings into 5 classes .
Training dataset:
Class 1 - 3000 images
Class 2 - 3000 images
Class 3 - 3000 images
Class 4 - 3000 images
Class 5 - 3000 images
I started with a very simple architecture:
Input image - 256 x 256 x 3
Convolutional layer 1 - 128 x 128 x 16 (3x3 filters, 16 filters, stride=2)
Convolutional layer 2 - 64 x 64 x 32 (3x3 filters, 32 filters, stride=2)
Convolutional layer 3 - 32 x 32 x 64 (3x3 filters, 64 filters, stride=2)
Max-pooling layer - 16 x 16 x 64 (2x2 pooling)
Fully-connected layer 1 - 1 x 1024
Fully-connected layer 2 - 1 x 64
Output - 1 x 5
Other information about my network:
Cost-function: tf.softmax_cross_entropy_with_logits
Optimizer: Adam optimizer (Learning rate=0.01, Epsilon=0.1)
Mini-batch size: 5
My cost-function has a high initial value of about 10 ^ 10, and then quickly drops to a value of about 1.6 (after several hundred iterations) and saturates at this value (no matter how long I train the network), The value of the value in the test the set is the same. This value is equivalent to predicting approximately equal probability for each class, and it makes the same predictions for all images. My forecasts look something like this:
[0.191877 0.203651 0.194455 0.200043 0.203081]

, , .. . , , - ( AlexNet):
Input image - 256 x 256 x 3
Convolutional layer 1 - 64 x 64 x 64 (11x11 filters, 64 filters, stride=4)
Convolutional layer 2 - 32 x 32 x 128 (5x5 filters, 128 filters, stride=2)
Convolutional layer 3 - 16 x 16 x 256 (3x3 filters, 256 filters, stride=2)
Convolutional layer 4 - 8 x 8 x 512 (3x3 filters, 512 filters, stride=2)
Convolutional layer 5 - 8 x 8 x 256 (3x3 filters, 256 filters, stride=1)
Fully-connected layer 1 - 1 x 4096
Fully-connected layer 2 - 1 x 4096
Fully-connected layer 3 - 1 x 4096
Dropout layer (0.5 probability)
Output - 1 x 5
- 1,6 .
:
- ? ( ) , .
- , ? ? , .
- , ? , , CNN , .
- , (50 ), , . , ; , .
:
import tensorflow as tf
sess = tf.Session()
BATCH_SIZE = 50
MAX_CAPACITY = 300
TRAINING_STEPS = 3001
def read_labeled_image_list(list_filename):
f = open(list_filename,'r')
filenames = []
labels = []
for line in f:
filename, label = line[:-1].split(' ')
filenames.append(filename)
labels.append(int(label))
return filenames,labels
def add_to_batch(image,label):
image_batch,label_batch = tf.train.batch([image,label],batch_size=BATCH_SIZE,num_threads=1,capacity=MAX_CAPACITY)
return image_batch, tf.reshape(label_batch,[BATCH_SIZE])
def read_image_with_label(input_queue):
""" Image """
file_contents = tf.read_file(input_queue[0])
example = tf.image.decode_png(file_contents)
my_image = tf.cast(example,tf.float32)
my_image = tf.reshape(my_image,[256,256,3])
my_image = my_image/255
my_mean = tf.reduce_mean(my_image)
my_image = my_image - my_mean
""" Label """
label = input_queue[1]-1
return add_to_batch(my_image,label)
def inference(x):
""" Layer 1: Convolutional """
W_conv1 = tf.Variable(tf.truncated_normal([11,11,3,64],stddev=0.0001),name='W_conv1')
b_conv1 = tf.Variable(tf.constant(0.1,shape=[64]),name='b_conv1')
h_conv1 = tf.nn.relu(tf.nn.conv2d(x,W_conv1,strides=[1,4,4,1],padding='SAME') + b_conv1)
""" Layer 2: Convolutional """
W_conv2 = tf.Variable(tf.truncated_normal([5,5,64,128],stddev=0.0001),name='W_conv2')
b_conv2 = tf.Variable(tf.constant(0.1,shape=[128]),name='b_conv2')
h_conv2 = tf.nn.relu(tf.nn.conv2d(h_conv1,W_conv2,strides=[1,2,2,1],padding='SAME') + b_conv2)
""" Layer 3: Convolutional """
W_conv3 = tf.Variable(tf.truncated_normal([3,3,128,256],stddev=0.0001),name='W_conv3')
b_conv3 = tf.Variable(tf.constant(0.1,shape=[256]),name='b_conv3')
h_conv3 = tf.nn.relu(tf.nn.conv2d(h_conv2,W_conv3,strides=[1,2,2,1],padding='SAME') + b_conv3)
""" Layer 4: Convolutional """
W_conv4 = tf.Variable(tf.truncated_normal([3,3,256,512],stddev=0.0001),name='W_conv4')
b_conv4 = tf.Variable(tf.constant(0.1,shape=[512]),name='b_conv4')
h_conv4 = tf.nn.relu(tf.nn.conv2d(h_conv3,W_conv4,strides=[1,2,2,1],padding='SAME') + b_conv4)
""" Layer 5: Convolutional """
W_conv5 = tf.Variable(tf.truncated_normal([3,3,512,256],stddev=0.0001),name='W_conv5')
b_conv5 = tf.Variable(tf.constant(0.1,shape=[256]),name='b_conv5')
h_conv5 = tf.nn.relu(tf.nn.conv2d(h_conv4,W_conv5,strides=[1,1,1,1],padding='SAME') + b_conv5)
""" Layer X: Pooling
# Pooling layer
h_pool1 = tf.nn.max_pool(h_conv3,ksize=[1,2,2,1],strides=[1,2,2,1],padding='SAME')"""
""" Layer 6: Fully-connected """
W_fc1 = tf.Variable(tf.truncated_normal([8*8*256,4096],stddev=0.0001),name='W_fc1')
b_fc1 = tf.Variable(tf.constant(0.1,shape=[4096]),name='b_fc1')
h_conv5_reshaped = tf.reshape(h_conv5,[-1,8*8*256])
h_fc1 = tf.nn.relu(tf.matmul(h_conv5_reshaped, W_fc1) + b_fc1)
""" Layer 7: Fully-connected """
W_fc2 = tf.Variable(tf.truncated_normal([4096,4096],stddev=0.0001),name='W_fc2')
b_fc2 = tf.Variable(tf.constant(0.1,shape=[4096]),name='b_fc2')
h_fc2 = tf.nn.relu(tf.matmul(h_fc1, W_fc2) + b_fc2)
""" Layer 8: Fully-connected """
W_fc3 = tf.Variable(tf.truncated_normal([4096,4096],stddev=0.0001),name='W_fc3')
b_fc3 = tf.Variable(tf.constant(0.1,shape=[4096]),name='b_fc3')
h_fc3 = tf.nn.relu(tf.matmul(h_fc2, W_fc3) + b_fc3)
""" Layer 9: Dropout layer """
h_dropout = tf.nn.dropout(h_fc3,0.5)
""" Readout layer: Softmax """
W_softmax = tf.Variable(tf.truncated_normal([4096,5],stddev=0.0001),name='W_softmax')
b_softmax = tf.Variable(tf.constant(0.1,shape=[5]),name='b_softmax')
y_conv = tf.nn.relu(tf.matmul(h_dropout,W_softmax) + b_softmax)
""" Summaries """
tf.histogram_summary('W_conv1',W_conv1)
tf.histogram_summary('W_conv2',W_conv2)
tf.histogram_summary('W_conv3',W_conv3)
tf.histogram_summary('W_conv4',W_conv4)
tf.histogram_summary('W_conv5',W_conv5)
tf.histogram_summary('W_fc1',W_fc1)
tf.histogram_summary('W_fc2',W_fc2)
tf.histogram_summary('W_fc3',W_fc3)
tf.histogram_summary('W_softmax',W_softmax)
tf.histogram_summary('b_conv1',b_conv1)
tf.histogram_summary('b_conv2',b_conv2)
tf.histogram_summary('b_conv3',b_conv3)
tf.histogram_summary('b_conv4',b_conv4)
tf.histogram_summary('b_conv5',b_conv5)
tf.histogram_summary('b_fc1',b_fc1)
tf.histogram_summary('b_fc2',b_fc2)
tf.histogram_summary('b_fc3',b_fc3)
tf.histogram_summary('b_softmax',b_softmax)
return y_conv
def cost_function(y_label,y_conv):
sparse_labels = tf.reshape(y_label,[BATCH_SIZE,1])
indices = tf.reshape(tf.range(BATCH_SIZE),[BATCH_SIZE,1])
concated = tf.concat(1,[indices,sparse_labels])
dense_labels = tf.sparse_to_dense(concated,[BATCH_SIZE,5],1.0,0.0)
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(y_conv,dense_labels))
y_prob = tf.nn.softmax(y_conv)
correct_prediction = tf.equal(tf.argmax(dense_labels,1), tf.argmax(y_prob,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
tf.scalar_summary('loss',cost)
tf.scalar_summary('accuracy',accuracy)
return cost, accuracy
def main ():
filename = '/labels/filenames_with_labels_server.txt'
image_list, label_list = read_labeled_image_list(filename)
images = tf.convert_to_tensor(image_list, dtype=tf.string)
labels = tf.convert_to_tensor(label_list,dtype=tf.int32)
input_queue = tf.train.slice_input_producer([images,labels],shuffle=True,capacity=MAX_CAPACITY)
image,label = read_image_with_label(input_queue)
y_conv = inference(image)
loss,acc = cost_function(label,y_conv)
train_step = tf.train.AdamOptimizer(learning_rate=0.001,epsilon=0.1).minimize(loss)
writer = tf.train.SummaryWriter('/SummaryLogs/log', sess.graph)
merged = tf.merge_all_summaries()
saver = tf.train.Saver()
""" Run session """
sess.run(tf.initialize_all_variables())
tf.train.start_queue_runners(sess=sess)
print('Running...')
for step in range(1,TRAINING_STEPS):
loss_val,acc_val,_,summary_str = sess.run([loss,acc,train_step,merged])
writer.add_summary(summary_str,step)
print "Step %d, Loss %g, Accuracy %g"%(step,loss_val,acc_val)
if(step == 1):
save_path = saver.save(sess,'/SavedVariables/model',global_step=step)
print "Initial model saved: %s"%save_path
save_path = saver.save(sess,'/SavedVariables/model-final')
print "Final model saved: %s"%save_path
""" Close session """
print('Finished')
sess.close()
if __name__ == '__main__':
main()
EDIT:
50 .
:
- Xavier
- , .. 255
- , ( ). 114.
, , . :
Step 1, Loss 1.37815, Accuracy 0.4
y_conv (before softmax):
[[ 0.30913264 0. 1.20176554 0. 0. ]
[ 0. 0. 1.23200822 0. 0. ]
[ 0. 0. 0. 0. 0. ]
[ 0. 0. 1.65852785 0.01910716 0. ]
[ 0. 0. 0.94612855 0. 0.10457891]]
y_prob (after softmax):
[[ 0.1771856 0.130069 0.43260741 0.130069 0.130069 ]
[ 0.13462381 0.13462381 0.46150482 0.13462381 0.13462381]
[ 0.2 0.2 0.2 0.2 0.2 ]
[ 0.1078648 0.1078648 0.56646001 0.1099456 0.1078648 ]
[ 0.14956713 0.14956713 0.38524282 0.14956713 0.16605586]]
:
Step 39, Loss 1.60944, Accuracy 0.2
y_conv (before softmax):
[[ 0. 0. 0. 0. 0.]
[ 0. 0. 0. 0. 0.]
[ 0. 0. 0. 0. 0.]
[ 0. 0. 0. 0. 0.]
[ 0. 0. 0. 0. 0.]]
y_prob (after softmax):
[[ 0.2 0.2 0.2 0.2 0.2]
[ 0.2 0.2 0.2 0.2 0.2]
[ 0.2 0.2 0.2 0.2 0.2]
[ 0.2 0.2 0.2 0.2 0.2]
[ 0.2 0.2 0.2 0.2 0.2]]
, a y_conv . , ; .