I am trying to implement a really simple neural network with backpropagation. I am trying to train the network with the AND logical operator. But predicting it does not work for me perfectly. :(
public class ActivationFunction { class func sigmoid(x: Float) -> Float { return 1.0 / (1.0 + exp(-x)) } class func dSigmoid(x: Float) -> Float { return x * (1 - x) } } public class NeuralNetConstants { public static let learningRate: Float = 0.3 public static let momentum: Float = 0.6 public static let iterations: Int = 100000 } public class Layer { private var output: [Float] private var input: [Float] private var weights: [Float] private var dWeights: [Float] init(inputSize: Int, outputSize: Int) { self.output = [Float](repeating: 0, count: outputSize) self.input = [Float](repeating: 0, count: inputSize + 1) self.weights = [Float](repeating: (-2.0...2.0).random(), count: (1 + inputSize) * outputSize) self.dWeights = [Float](repeating: 0, count: weights.count) } public func run(inputArray: [Float]) -> [Float] { input = inputArray input[input.count-1] = 1 var offSet = 0 for i in 0..<output.count { for j in 0..<input.count { output[i] += weights[offSet+j] * input[j] } output[i] = ActivationFunction.sigmoid(x: output[i]) offSet += input.count } return output } public func train(error: [Float], learningRate: Float, momentum: Float) -> [Float] { var offset = 0 var nextError = [Float](repeating: 0, count: input.count) for i in 0..<output.count { let delta = error[i] * ActivationFunction.dSigmoid(x: output[i]) for j in 0..<input.count { let weightIndex = offset + j nextError[j] = nextError[j] + weights[weightIndex] * delta let dw = input[j] * delta * learningRate weights[weightIndex] += dWeights[weightIndex] * momentum + dw dWeights[weightIndex] = dw } offset += input.count } return nextError } } public class BackpropNeuralNetwork { private var layers: [Layer] = [] public init(inputSize: Int, hiddenSize: Int, outputSize: Int) { self.layers.append(Layer(inputSize: inputSize, outputSize: hiddenSize)) self.layers.append(Layer(inputSize: hiddenSize, outputSize: outputSize)) } public func getLayer(index: Int) -> Layer { return layers[index] } public func run(input: [Float]) -> [Float] { var activations = input for i in 0..<layers.count { activations = layers[i].run(inputArray: activations) } return activations } public func train(input: [Float], targetOutput: [Float], learningRate: Float, momentum: Float) { let calculatedOutput = run(input: input) var error = [Float](repeating: 0, count: calculatedOutput.count) for i in 0..<error.count { error[i] = targetOutput[i] - calculatedOutput[i] } for i in (0...layers.count-1).reversed() { error = layers[i].train(error: error, learningRate: learningRate, momentum: momentum) } } } extension ClosedRange where Bound: FloatingPoint { public func random() -> Bound { let range = self.upperBound - self.lowerBound let randomValue = (Bound(arc4random_uniform(UINT32_MAX)) / Bound(UINT32_MAX)) * range + self.lowerBound return randomValue } }
This is my training data. I just want my network to learn the simple logical AND operator.
My input:
let traningData: [[Float]] = [ [0,0], [0,1], [1,0], [1,1] ] let traningResults: [[Float]] = [ [0], [0], [0], [1] ] let backProb = BackpropNeuralNetwork(inputSize: 2, hiddenSize: 3, outputSize: 1) for iterations in 0..<NeuralNetConstants.iterations { for i in 0..<traningResults.count { backProb.train(input: traningData[i], targetOutput: traningResults[i], learningRate: NeuralNetConstants.learningRate, momentum: NeuralNetConstants.momentum) } for i in 0..<traningResults.count { var t = traningData[i] print("\(t[0]), \(t[1]) -- \(backProb.run(input: t)[0])") } }
This is my entire neural network code. The code is actually not very smart, but I think it is important to understand the theory of neural networks first, then the code will be more careful.
The problem is that my results are completely wrong. This is what I get
0.0, 0.0 -- 0.246135 0.0, 1.0 -- 0.251307 1.0, 0.0 -- 0.24325 1.0, 1.0 -- 0.240923
This is what I want to get
0,0, 0,0
Good for comparison, the java implementation works fine.
public class ActivationFunction { public static float sigmoid(float x) { return (float) (1 / (1 + Math.exp(-x))); } public static float dSigmoid(float x) { return x*(1-x); // because the output is the sigmoid(x) !!! we dont have to apply it twice } } public class NeuralNetConstants { private NeuralNetConstants() { } public static final float LEARNING_RATE = 0.3f; public static final float MOMENTUM = 0.6f; public static final int ITERATIONS = 100000; } public class Layer { private float[] output; private float[] input; private float[] weights; private float[] dWeights; private Random random; public Layer(int inputSize, int outputSize) { output = new float[outputSize]; input = new float[inputSize + 1]; weights = new float[(1 + inputSize) * outputSize]; dWeights = new float[weights.length]; this.random = new Random(); initWeights(); } public void initWeights() { for (int i = 0; i < weights.length; i++) { weights[i] = (random.nextFloat() - 0.5f) * 4f; } } public float[] run(float[] inputArray) { System.arraycopy(inputArray, 0, input, 0, inputArray.length); input[input.length - 1] = 1; // bias int offset = 0; for (int i = 0; i < output.length; i++) { for (int j = 0; j < input.length; j++) { output[i] += weights[offset + j] * input[j]; } output[i] = ActivationFunction.sigmoid(output[i]); offset += input.length; } return Arrays.copyOf(output, output.length); } public float[] train(float[] error, float learningRate, float momentum) { int offset = 0; float[] nextError = new float[input.length]; for (int i = 0; i < output.length; i++) { float delta = error[i] * ActivationFunction.dSigmoid(output[i]); for (int j = 0; j < input.length; j++) { int previousWeightIndex = offset + j; nextError[j] = nextError[j] + weights[previousWeightIndex] * delta; float dw = input[j] * delta * learningRate; weights[previousWeightIndex] += dWeights[previousWeightIndex] * momentum + dw; dWeights[previousWeightIndex] = dw; } offset += input.length; } return nextError; } } public class BackpropNeuralNetwork { private Layer[] layers; public BackpropNeuralNetwork(int inputSize, int hiddenSize, int outputSize) { layers = new Layer[2]; layers[0] = new Layer(inputSize, hiddenSize); layers[1] = new Layer(hiddenSize, outputSize); } public Layer getLayer(int index) { return layers[index]; } public float[] run(float[] input) { float[] inputActivation = input; for (int i = 0; i < layers.length; i++) { inputActivation = layers[i].run(inputActivation); } return inputActivation; } public void train(float[] input, float[] targetOutput, float learningRate, float momentum) { float[] calculatedOutput = run(input); float[] error = new float[calculatedOutput.length]; for (int i = 0; i < error.length; i++) { error[i] = targetOutput[i] - calculatedOutput[i]; } for (int i = layers.length - 1; i >= 0; i--) { error = layers[i].train(error, learningRate, momentum); } } } public class NeuralNetwork { /** * @param args the command line arguments */ public static void main(String[] args) { float[][] trainingData = new float[][] { new float[] { 0, 0 }, new float[] { 0, 1 }, new float[] { 1, 0 }, new float[] { 1, 1 } }; float[][] trainingResults = new float[][] { new float[] { 0 }, new float[] { 0 }, new float[] { 0 }, new float[] { 1 } }; BackpropNeuralNetwork backpropagationNeuralNetworks = new BackpropNeuralNetwork(2, 3,1); for (int iterations = 0; iterations < NeuralNetConstants.ITERATIONS; iterations++) { for (int i = 0; i < trainingResults.length; i++) { backpropagationNeuralNetworks.train(trainingData[i], trainingResults[i], NeuralNetConstants.LEARNING_RATE, NeuralNetConstants.MOMENTUM); } System.out.println(); for (int i = 0; i < trainingResults.length; i++) { float[] t = trainingData[i]; System.out.printf("%d epoch\n", iterations + 1); System.out.printf("%.1f, %.1f --> %.3f\n", t[0], t[1], backpropagationNeuralNetworks.run(t)[0]); } } } }