Rprop implementation

I am trying to implement rprop using my old backprop code as a basis. I am working on a perceptron with one hidden layer. The Rprop algorithm is quite simple, but I did not understand everything. This is my code:

for (j = 1; j <= nnh; j++)
{
    network.input2[j] = network.w12[0][j];
    for (i = 1; i <= nni; i++)
        network.input2[j] += network.input[i] * network.w12[i][j];

     network.output2[j] = (float)(1.0 / (1.0 + Math.Pow(Math.E, beta * -network.input2[j])));
}

for (k = 1; k <= nno; k++)
{
    network.input3[k] = network.w23[0][k];
    for (j = 1; j <= nnh; j++)
        network.input3[k] += network.output2[j] * network.w23[j][k];

     network.output[k] = (float)(1.0 / (1.0 + Math.Pow(Math.E, beta * -network.input3[k])));

    error += (float)(0.5 * (t[k - 1] - network.output[k]) * (t[k - 1] - network.output[k]));
    derivativeO[k] = (float)(t[k - 1] - network.output[k]) * network.output[k] * (1 - network.output[k]);
}

for (j = 1; j <= nnh; j++)
{
    saw[j] = 0;
    for (k = 1; k <= nno; k++)
        saw[j] += derivativeO[k] * network.output2[j];

    derivativeH[j] = saw[j] * network.output2[j] * (1 - network.output2[j]);
}

for (j = 1; j <= nnh; j++)//number of neurons in hidden layer
{
    for (i = 1; i <= nni; i++)//number of inputs
    {
        network.gradientH[i][j] = network.input[i] * derivativeH[j];

        if (network.gradientH[i][j] * network.gradientHPrev[i][j] > 0)
        {
            network.deltaH[i][j] = Math.Min(network.deltaH[i][j] * npos, dmax);
            network.w12d[i][j] = -Math.Sign(network.gradientH[i][j]) * network.deltaH[i][j];
            network.w12[i][j] += network.w12d[i][j];
            network.gradientHPrev[i][j] = network.gradientH[i][j];
        }
        else if (network.gradientH[i][j] * network.gradientHPrev[i][j] < 0)
        {
            network.deltaH[i][j] = Math.Max(network.deltaH[i][j] * nneg, dmin);
            network.gradientHPrev[i][j] = 0;
        }
        else if (network.gradientH[i][j] * network.gradientHPrev[i][j] == 0)
        {
            network.w12d[i][j] = -Math.Sign(network.gradientH[i][j]) * network.deltaH[i][j];
            network.w12[i][j] += network.w12d[i][j];
            network.gradientHPrev[i][j] = network.gradientH[i][j];
        }
    }
}

for (k = 1; k <= nno; k++)//number of outputs
{
    for (j = 1; j <= nnh; j++)//number of neurons in hidden layer
    {
        network.gradientO[j][k] = network.output2[j] * derivativeO[k];

        if (network.gradientOPrev[j][k] * network.gradientO[j][k] > 0)
        {
            network.deltaO[j][k] = Math.Min(network.deltaO[j][k] * npos, dmax);
            network.w23d[j][k] = -Math.Sign(network.gradientO[j][k]) * network.deltaO[j][k];
            network.w23[j][k] += network.w23d[j][k];
            network.gradientOPrev[j][k] = network.gradientO[j][k];
        }
        else if (network.gradientOPrev[j][k] * network.gradientO[j][k] < 0)
        {
            network.deltaO[j][k] = Math.Max(network.deltaO[j][k] * nneg, dmin);
            network.gradientOPrev[j][k] = 0;
        }
        else if (network.gradientOPrev[j][k] * network.gradientO[j][k] == 0)
        {
            network.w23d[j][k] = -Math.Sign(network.gradientO[j][k]) * network.deltaO[j][k];
            network.w23[j][k] += network.w23d[j][k];
            network.gradientOPrev[j][k] = network.gradientO[j][k];
        }
    }
}

The first three for loops are the same ones I used in backprop. This part of the code is working fine. The problem occurs during weight updates. I am not right now if I am calculating partial derivatives correctly. The network sometimes converges, and sometimes it just behaves randomly. I think everything is fixed. Any suggestions?

For loops, it starts at 1, because the first elements of the weight matrices were stored in the previous offset values โ€‹โ€‹of the backprop implementation. This is the previous backprop weight update implementation that works great, maybe this will make some things clearer:

for (j = 1; j <= nnh; j++)
{
    network.w12d[0][j] = learningRate * derivativeH[j] + momentum * network.w12d[0][j];
    network.w12[0][j] += network.w12d[0][j];
    for (i = 1; i <= nni; i++)
    {
        network.w12d[i][j] = learningRate * network.input[i] * derivativeH[j] + momentum * network.w12d[i][j];
        network.w12[i][j] += network.w12d[i][j];
    }
}
for (k = 1; k <= nno; k++)
{
    network.w23d[0][k] = learningRate * derivativeO[k] + momentum * network.w23d[0][k];
    network.w23[0][k] += network.w23d[0][k];
    for (j = 1; j <= nnh; j++)
    {
        network.w23d[j][k] = learningRate * network.output2[j] * derivativeO[k] + momentum * network.w23d[j][k];
        network.w23[j][k] += network.w23d[j][k];
    }
}
+4

All Articles