I am trying to implement rprop using my old backprop code as a basis. I am working on a perceptron with one hidden layer. The Rprop algorithm is quite simple, but I did not understand everything. This is my code:
for (j = 1; j <= nnh; j++)
{
network.input2[j] = network.w12[0][j];
for (i = 1; i <= nni; i++)
network.input2[j] += network.input[i] * network.w12[i][j];
network.output2[j] = (float)(1.0 / (1.0 + Math.Pow(Math.E, beta * -network.input2[j])));
}
for (k = 1; k <= nno; k++)
{
network.input3[k] = network.w23[0][k];
for (j = 1; j <= nnh; j++)
network.input3[k] += network.output2[j] * network.w23[j][k];
network.output[k] = (float)(1.0 / (1.0 + Math.Pow(Math.E, beta * -network.input3[k])));
error += (float)(0.5 * (t[k - 1] - network.output[k]) * (t[k - 1] - network.output[k]));
derivativeO[k] = (float)(t[k - 1] - network.output[k]) * network.output[k] * (1 - network.output[k]);
}
for (j = 1; j <= nnh; j++)
{
saw[j] = 0;
for (k = 1; k <= nno; k++)
saw[j] += derivativeO[k] * network.output2[j];
derivativeH[j] = saw[j] * network.output2[j] * (1 - network.output2[j]);
}
for (j = 1; j <= nnh; j++)
{
for (i = 1; i <= nni; i++)
{
network.gradientH[i][j] = network.input[i] * derivativeH[j];
if (network.gradientH[i][j] * network.gradientHPrev[i][j] > 0)
{
network.deltaH[i][j] = Math.Min(network.deltaH[i][j] * npos, dmax);
network.w12d[i][j] = -Math.Sign(network.gradientH[i][j]) * network.deltaH[i][j];
network.w12[i][j] += network.w12d[i][j];
network.gradientHPrev[i][j] = network.gradientH[i][j];
}
else if (network.gradientH[i][j] * network.gradientHPrev[i][j] < 0)
{
network.deltaH[i][j] = Math.Max(network.deltaH[i][j] * nneg, dmin);
network.gradientHPrev[i][j] = 0;
}
else if (network.gradientH[i][j] * network.gradientHPrev[i][j] == 0)
{
network.w12d[i][j] = -Math.Sign(network.gradientH[i][j]) * network.deltaH[i][j];
network.w12[i][j] += network.w12d[i][j];
network.gradientHPrev[i][j] = network.gradientH[i][j];
}
}
}
for (k = 1; k <= nno; k++)
{
for (j = 1; j <= nnh; j++)
{
network.gradientO[j][k] = network.output2[j] * derivativeO[k];
if (network.gradientOPrev[j][k] * network.gradientO[j][k] > 0)
{
network.deltaO[j][k] = Math.Min(network.deltaO[j][k] * npos, dmax);
network.w23d[j][k] = -Math.Sign(network.gradientO[j][k]) * network.deltaO[j][k];
network.w23[j][k] += network.w23d[j][k];
network.gradientOPrev[j][k] = network.gradientO[j][k];
}
else if (network.gradientOPrev[j][k] * network.gradientO[j][k] < 0)
{
network.deltaO[j][k] = Math.Max(network.deltaO[j][k] * nneg, dmin);
network.gradientOPrev[j][k] = 0;
}
else if (network.gradientOPrev[j][k] * network.gradientO[j][k] == 0)
{
network.w23d[j][k] = -Math.Sign(network.gradientO[j][k]) * network.deltaO[j][k];
network.w23[j][k] += network.w23d[j][k];
network.gradientOPrev[j][k] = network.gradientO[j][k];
}
}
}
The first three for loops are the same ones I used in backprop. This part of the code is working fine. The problem occurs during weight updates. I am not right now if I am calculating partial derivatives correctly. The network sometimes converges, and sometimes it just behaves randomly. I think everything is fixed. Any suggestions?
For loops, it starts at 1, because the first elements of the weight matrices were stored in the previous offset values โโof the backprop implementation. This is the previous backprop weight update implementation that works great, maybe this will make some things clearer:
for (j = 1; j <= nnh; j++)
{
network.w12d[0][j] = learningRate * derivativeH[j] + momentum * network.w12d[0][j];
network.w12[0][j] += network.w12d[0][j];
for (i = 1; i <= nni; i++)
{
network.w12d[i][j] = learningRate * network.input[i] * derivativeH[j] + momentum * network.w12d[i][j];
network.w12[i][j] += network.w12d[i][j];
}
}
for (k = 1; k <= nno; k++)
{
network.w23d[0][k] = learningRate * derivativeO[k] + momentum * network.w23d[0][k];
network.w23[0][k] += network.w23d[0][k];
for (j = 1; j <= nnh; j++)
{
network.w23d[j][k] = learningRate * network.output2[j] * derivativeO[k] + momentum * network.w23d[j][k];
network.w23[j][k] += network.w23d[j][k];
}
}