Using the pima diabetes diabetes dataset I am trying to build an accurate model using Keras. I wrote the following code:
from keras import callbacks
from keras.layers import Dropout
tb = callbacks.TensorBoard(log_dir='/.logs', histogram_freq=10, batch_size=32,
write_graph=True, write_grads=True, write_images=False,
embeddings_freq=0, embeddings_layer_names=None, embeddings_metadata=None)
from keras.models import Sequential
from keras.layers import Dense
import matplotlib.pyplot as plt
import numpy
seed = 7
numpy.random.seed(seed)
dataset = numpy.loadtxt("pima-indians-diabetes.csv", delimiter=",")
X = dataset[:, 0:8]
Y = dataset[:, 8]
model = Sequential()
model.add(Dense(12, input_dim=8, kernel_initializer='uniform', activation='relu', name='first_input'))
model.add(Dense(500, activation='tanh', name='first_hidden'))
model.add(Dropout(0.5, name='dropout_1'))
model.add(Dense(8, activation='relu', name='second_hidden'))
model.add(Dense(1, activation='sigmoid', name='output_layer'))
model.compile(loss='binary_crossentropy',
optimizer='rmsprop',
metrics=['accuracy'])
history = model.fit(X, Y, validation_split=0.33, epochs=1000, batch_size=10, verbose=0, callbacks=[tb])
print(history.history.keys())
plt.plot(history.history['acc'])
plt.plot(history.history['val_acc'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()
After several attempts, I added dropout levels to avoid overriding, but no luck. The following graph shows that test losses and learning losses become separate at one point.

What else can I do to optimize this network?
UPDATE:
based on the comments I received, I changed the code like this:
model = Sequential()
model.add(Dense(12, input_dim=8, kernel_initializer='uniform', kernel_regularizer=regularizers.l2(0.01),
activity_regularizer=regularizers.l1(0.01), activation='relu',
name='first_input')) # added regularizers
model.add(Dense(8, activation='relu', name='first_hidden')) # reduced to 8 neurons
model.add(Dropout(0.5, name='dropout_1'))
model.add(Dense(5, activation='relu', name='second_hidden'))
model.add(Dense(1, activation='sigmoid', name='output_layer'))
Here are the graphs for 500 eras
