Software Programming

Kunuk Nykjaer

Neural network backpropagation with Java

with 39 comments


Neural network with backpropagation training xor example.
This framework supports only one hidden layer and the activation function is sigmoid.

In this example there are two inputs neurons, four neurons in hidden layers and one neuron in output layer.
The weights are initialized randomly.

NeuralNetwork.java

import java.text.*;
import java.util.*;

public class NeuralNetwork {
	static {
		Locale.setDefault(Locale.ENGLISH);
	}

	final boolean isTrained = false;
	final DecimalFormat df;
	final Random rand = new Random();
	final ArrayList<Neuron> inputLayer = new ArrayList<Neuron>();
	final ArrayList<Neuron> hiddenLayer = new ArrayList<Neuron>();
	final ArrayList<Neuron> outputLayer = new ArrayList<Neuron>();
	final Neuron bias = new Neuron();
	final int[] layers;
	final int randomWeightMultiplier = 1;

	final double epsilon = 0.00000000001;

	final double learningRate = 0.9f;
	final double momentum = 0.7f;

	// Inputs for xor problem
	final double inputs[][] = { { 1, 1 }, { 1, 0 }, { 0, 1 }, { 0, 0 } };

	// Corresponding outputs, xor training data
	final double expectedOutputs[][] = { { 0 }, { 1 }, { 1 }, { 0 } };
	double resultOutputs[][] = { { -1 }, { -1 }, { -1 }, { -1 } }; // dummy init
	double output[];

	// for weight update all
	final HashMap<String, Double> weightUpdate = new HashMap<String, Double>();

	public static void main(String[] args) {
		NeuralNetwork nn = new NeuralNetwork(2, 4, 1);
		int maxRuns = 50000;
		double minErrorCondition = 0.001;
		nn.run(maxRuns, minErrorCondition);
	}

	public NeuralNetwork(int input, int hidden, int output) {
		this.layers = new int[] { input, hidden, output };
		df = new DecimalFormat("#.0#");

		/**
		 * Create all neurons and connections Connections are created in the
		 * neuron class
		 */
		for (int i = 0; i < layers.length; i++) {
			if (i == 0) { // input layer
				for (int j = 0; j < layers[i]; j++) {
					Neuron neuron = new Neuron();
					inputLayer.add(neuron);
				}
			} else if (i == 1) { // hidden layer
				for (int j = 0; j < layers[i]; j++) {
					Neuron neuron = new Neuron();
					neuron.addInConnectionsS(inputLayer);
					neuron.addBiasConnection(bias);
					hiddenLayer.add(neuron);
				}
			}

			else if (i == 2) { // output layer
				for (int j = 0; j < layers[i]; j++) {
					Neuron neuron = new Neuron();
					neuron.addInConnectionsS(hiddenLayer);
					neuron.addBiasConnection(bias);
					outputLayer.add(neuron);
				}
			} else {
				System.out.println("!Error NeuralNetwork init");
			}
		}

		// initialize random weights
		for (Neuron neuron : hiddenLayer) {
			ArrayList<Connection> connections = neuron.getAllInConnections();
			for (Connection conn : connections) {
				double newWeight = getRandom();
				conn.setWeight(newWeight);
			}
		}
		for (Neuron neuron : outputLayer) {
			ArrayList<Connection> connections = neuron.getAllInConnections();
			for (Connection conn : connections) {
				double newWeight = getRandom();
				conn.setWeight(newWeight);
			}
		}

		// reset id counters
		Neuron.counter = 0;
		Connection.counter = 0;

		if (isTrained) {
			trainedWeights();
			updateAllWeights();
		}
	}

	// random
	double getRandom() {
		return randomWeightMultiplier * (rand.nextDouble() * 2 - 1); // [-1;1[
	}

	/**
	 * 
	 * @param inputs
	 *            There is equally many neurons in the input layer as there are
	 *            in input variables
	 */
	public void setInput(double inputs[]) {
		for (int i = 0; i < inputLayer.size(); i++) {
			inputLayer.get(i).setOutput(inputs[i]);
		}
	}

	public double[] getOutput() {
		double[] outputs = new double[outputLayer.size()];
		for (int i = 0; i < outputLayer.size(); i++)
			outputs[i] = outputLayer.get(i).getOutput();
		return outputs;
	}

	/**
	 * Calculate the output of the neural network based on the input The forward
	 * operation
	 */
	public void activate() {
		for (Neuron n : hiddenLayer)
			n.calculateOutput();
		for (Neuron n : outputLayer)
			n.calculateOutput();
	}

	/**
	 * all output propagate back
	 * 
	 * @param expectedOutput
	 *            first calculate the partial derivative of the error with
	 *            respect to each of the weight leading into the output neurons
	 *            bias is also updated here
	 */
	public void applyBackpropagation(double expectedOutput[]) {

		// error check, normalize value ]0;1[
		for (int i = 0; i < expectedOutput.length; i++) {
			double d = expectedOutput[i];
			if (d < 0 || d > 1) {
				if (d < 0)
					expectedOutput[i] = 0 + epsilon;
				else
					expectedOutput[i] = 1 - epsilon;
			}
		}

		int i = 0;
		for (Neuron n : outputLayer) {
			ArrayList<Connection> connections = n.getAllInConnections();
			for (Connection con : connections) {
				double ak = n.getOutput();
				double ai = con.leftNeuron.getOutput();
				double desiredOutput = expectedOutput[i];

				double partialDerivative = -ak * (1 - ak) * ai
						* (desiredOutput - ak);
				double deltaWeight = -learningRate * partialDerivative;
				double newWeight = con.getWeight() + deltaWeight;
				con.setDeltaWeight(deltaWeight);
				con.setWeight(newWeight + momentum * con.getPrevDeltaWeight());
			}
			i++;
		}

		// update weights for the hidden layer
		for (Neuron n : hiddenLayer) {
			ArrayList<Connection> connections = n.getAllInConnections();
			for (Connection con : connections) {
				double aj = n.getOutput();
				double ai = con.leftNeuron.getOutput();
				double sumKoutputs = 0;
				int j = 0;
				for (Neuron out_neu : outputLayer) {
					double wjk = out_neu.getConnection(n.id).getWeight();
					double desiredOutput = (double) expectedOutput[j];
					double ak = out_neu.getOutput();
					j++;
					sumKoutputs = sumKoutputs
							+ (-(desiredOutput - ak) * ak * (1 - ak) * wjk);
				}

				double partialDerivative = aj * (1 - aj) * ai * sumKoutputs;
				double deltaWeight = -learningRate * partialDerivative;
				double newWeight = con.getWeight() + deltaWeight;
				con.setDeltaWeight(deltaWeight);
				con.setWeight(newWeight + momentum * con.getPrevDeltaWeight());
			}
		}
	}

	void run(int maxSteps, double minError) {
		int i;
		// Train neural network until minError reached or maxSteps exceeded
		double error = 1;
		for (i = 0; i < maxSteps && error > minError; i++) {
			error = 0;
			for (int p = 0; p < inputs.length; p++) {
				setInput(inputs[p]);

				activate();

				output = getOutput();
				resultOutputs[p] = output;

				for (int j = 0; j < expectedOutputs[p].length; j++) {
					double err = Math.pow(output[j] - expectedOutputs[p][j], 2);
					error += err;
				}

				applyBackpropagation(expectedOutputs[p]);
			}
		}

		printResult();
		
		System.out.println("Sum of squared errors = " + error);
		System.out.println("##### EPOCH " + i+"\n");
		if (i == maxSteps) {
			System.out.println("!Error training try again");
		} else {
			printAllWeights();
			printWeightUpdate();
		}
	}
	
	void printResult()
	{
		System.out.println("NN example with xor training");
		for (int p = 0; p < inputs.length; p++) {
			System.out.print("INPUTS: ");
			for (int x = 0; x < layers[0]; x++) {
				System.out.print(inputs[p][x] + " ");
			}

			System.out.print("EXPECTED: ");
			for (int x = 0; x < layers[2]; x++) {
				System.out.print(expectedOutputs[p][x] + " ");
			}

			System.out.print("ACTUAL: ");
			for (int x = 0; x < layers[2]; x++) {
				System.out.print(resultOutputs[p][x] + " ");
			}
			System.out.println();
		}
		System.out.println();
	}

	String weightKey(int neuronId, int conId) {
		return "N" + neuronId + "_C" + conId;
	}

	/**
	 * Take from hash table and put into all weights
	 */
	public void updateAllWeights() {
		// update weights for the output layer
		for (Neuron n : outputLayer) {
			ArrayList<Connection> connections = n.getAllInConnections();
			for (Connection con : connections) {
				String key = weightKey(n.id, con.id);
				double newWeight = weightUpdate.get(key);
				con.setWeight(newWeight);
			}
		}
		// update weights for the hidden layer
		for (Neuron n : hiddenLayer) {
			ArrayList<Connection> connections = n.getAllInConnections();
			for (Connection con : connections) {
				String key = weightKey(n.id, con.id);
				double newWeight = weightUpdate.get(key);
				con.setWeight(newWeight);
			}
		}
	}

	// trained data
	void trainedWeights() {
		weightUpdate.clear();
		
		weightUpdate.put(weightKey(3, 0), 1.03);
		weightUpdate.put(weightKey(3, 1), 1.13);
		weightUpdate.put(weightKey(3, 2), -.97);
		weightUpdate.put(weightKey(4, 3), 7.24);
		weightUpdate.put(weightKey(4, 4), -3.71);
		weightUpdate.put(weightKey(4, 5), -.51);
		weightUpdate.put(weightKey(5, 6), -3.28);
		weightUpdate.put(weightKey(5, 7), 7.29);
		weightUpdate.put(weightKey(5, 8), -.05);
		weightUpdate.put(weightKey(6, 9), 5.86);
		weightUpdate.put(weightKey(6, 10), 6.03);
		weightUpdate.put(weightKey(6, 11), .71);
		weightUpdate.put(weightKey(7, 12), 2.19);
		weightUpdate.put(weightKey(7, 13), -8.82);
		weightUpdate.put(weightKey(7, 14), -8.84);
		weightUpdate.put(weightKey(7, 15), 11.81);
		weightUpdate.put(weightKey(7, 16), .44);
	}

	public void printWeightUpdate() {
		System.out.println("printWeightUpdate, put this i trainedWeights() and set isTrained to true");
		// weights for the hidden layer
		for (Neuron n : hiddenLayer) {
			ArrayList<Connection> connections = n.getAllInConnections();
			for (Connection con : connections) {
				String w = df.format(con.getWeight());
				System.out.println("weightUpdate.put(weightKey(" + n.id + ", "
						+ con.id + "), " + w + ");");
			}
		}
		// weights for the output layer
		for (Neuron n : outputLayer) {
			ArrayList<Connection> connections = n.getAllInConnections();
			for (Connection con : connections) {
				String w = df.format(con.getWeight());
				System.out.println("weightUpdate.put(weightKey(" + n.id + ", "
						+ con.id + "), " + w + ");");
			}
		}
		System.out.println();
	}

	public void printAllWeights() {
		System.out.println("printAllWeights");
		// weights for the hidden layer
		for (Neuron n : hiddenLayer) {
			ArrayList<Connection> connections = n.getAllInConnections();
			for (Connection con : connections) {
				double w = con.getWeight();
				System.out.println("n=" + n.id + " c=" + con.id + " w=" + w);
			}
		}
		// weights for the output layer
		for (Neuron n : outputLayer) {
			ArrayList<Connection> connections = n.getAllInConnections();
			for (Connection con : connections) {
				double w = con.getWeight();
				System.out.println("n=" + n.id + " c=" + con.id + " w=" + w);
			}
		}
		System.out.println();
	}
}

Neuron.java

import java.util.*;

public class Neuron {	
	static int counter = 0;
	final public int id;  // auto increment, starts at 0
	Connection biasConnection;
	final double bias = -1;
	double output;
	
	ArrayList<Connection> Inconnections = new ArrayList<Connection>();
	HashMap<Integer,Connection> connectionLookup = new HashMap<Integer,Connection>();
	
	public Neuron(){		
		id = counter;
		counter++;
	}
	
	/**
	 * Compute Sj = Wij*Aij + w0j*bias
	 */
	public void calculateOutput(){
		double s = 0;
		for(Connection con : Inconnections){
			Neuron leftNeuron = con.getFromNeuron();
			double weight = con.getWeight();
			double a = leftNeuron.getOutput(); //output from previous layer
			
			s = s + (weight*a);
		}
		s = s + (biasConnection.getWeight()*bias);
		
		output = g(s);
	}
	
	
	double g(double x) {
		return sigmoid(x);
	}

	double sigmoid(double x) {
		return 1.0 / (1.0 +  (Math.exp(-x)));
	}
	
	public void addInConnectionsS(ArrayList<Neuron> inNeurons){
		for(Neuron n: inNeurons){
			Connection con = new Connection(n,this);
			Inconnections.add(con);
			connectionLookup.put(n.id, con);
		}
	}
	
	public Connection getConnection(int neuronIndex){
		return connectionLookup.get(neuronIndex);
	}

	public void addInConnection(Connection con){
		Inconnections.add(con);
	}
	public void addBiasConnection(Neuron n){
		Connection con = new Connection(n,this);
		biasConnection = con;
		Inconnections.add(con);
	}
	public ArrayList<Connection> getAllInConnections(){
		return Inconnections;
	}
	
	public double getBias() {
		return bias;
	}
	public double getOutput() {
		return output;
	}
	public void setOutput(double o){
		output = o;
	}
}

Connection.java

public class Connection {
	double weight = 0;
	double prevDeltaWeight = 0; // for momentum
	double deltaWeight = 0;

	final Neuron leftNeuron;
	final Neuron rightNeuron;
	static int counter = 0;
	final public int id; // auto increment, starts at 0

	public Connection(Neuron fromN, Neuron toN) {
		leftNeuron = fromN;
		rightNeuron = toN;
		id = counter;
		counter++;
	}

	public double getWeight() {
		return weight;
	}

	public void setWeight(double w) {
		weight = w;
	}

	public void setDeltaWeight(double w) {
		prevDeltaWeight = deltaWeight;
		deltaWeight = w;
	}

	public double getPrevDeltaWeight() {
		return prevDeltaWeight;
	}

	public Neuron getFromNeuron() {
		return leftNeuron;
	}

	public Neuron getToNeuron() {
		return rightNeuron;
	}
}

Result:

NN example with xor training
INPUTS: 1.0 1.0 EXPECTED: 0.0 ACTUAL: 0.01978605453528619
INPUTS: 1.0 0.0 EXPECTED: 1.0 ACTUAL: 0.9836399078122067
INPUTS: 0.0 1.0 EXPECTED: 1.0 ACTUAL: 0.9831299198563257
INPUTS: 0.0 0.0 EXPECTED: 0.0 ACTUAL: 0.007493158102140806

Sum of squared errors = 9.99887592864088E-4
##### EPOCH 5689

Advertisements

Written by kunuk Nykjaer

October 11, 2010 at 12:23 am

39 Responses

Subscribe to comments with RSS.

  1. wah, sangat membantu.. terimakasih contoh programnya πŸ™‚

    milolipop

    June 9, 2011 at 4:47 am

  2. hi i m looking program java for back propagation algorithm

    pwor love2012w

    August 23, 2011 at 1:38 am

    • This is a NN with back propagation with one hidden layer

      kunuk Nykjaer

      May 10, 2013 at 7:19 pm

      • do you have an example about handwriting offline recognition using back propagation neural network in java

        celia

        April 25, 2016 at 12:09 pm

  3. I wonder, if this neural network could be trained first and then I would just give the inputs (without expected outputs) and the neural network would calculate the outputs by herself? What would have to be changed in code?
    Thank you in advance.

    Saso

    June 14, 2012 at 1:59 am

    • I the NeuralNetwork class there is this line
      final boolean isTrained = false;

      set it to true and it will use the weight values and calculate the value. It will run 1 epoch (no training).

      kunuk Nykjaer

      May 10, 2013 at 7:23 pm

      • I am able to understand the biology, layout and interpretation of the code but if possible could you please upload a detailed explanation for the code from a programming perspective

        Rahul

        July 18, 2013 at 1:34 pm

  4. this is training… what about testing? how to use the same updated weight to test actual data?

    mohdyasseribz

    June 16, 2012 at 12:02 pm

    • Preferable you save the trained weight values (e.g. to a csv file or serialized file). Then you load and initialize your network with your weight values and test the network that it calculates correctly for a given test data set.

      kunuk Nykjaer

      May 10, 2013 at 6:47 pm

  5. Thank you Mr.Kunuk Nykjaer.Saya permission to use your source code.but I want to ask why you use epsilon ? i don’t know what for epsiolon function .

    manuturfilio

    July 4, 2012 at 11:22 am

    • When you use floating numbers you might have floating precision issues. Epsilon is very small value used to avoid floating precision issues when comparing numbers. Here I use it as “if it almost 1 then it is 1”

      kunuk Nykjaer

      March 1, 2013 at 4:09 pm

      • I have some questions about feed forward with back propagation.
        First, how will I use my features extracted during image processing so that the neural network will be trained? Which part of code will the features will be feed to the neural network? I have more or less 15 features to be extracted from an image.

        jralvero

        March 5, 2013 at 4:11 pm

  6. Hello there!
    I am planning to use your codes for my study which involves image processing and artificial neural.
    Is it okay if I will use these codes?
    Do these codes implement feed forward neural network type with back propagation supervised learning method?

    jralvero

    March 1, 2013 at 5:58 am

    • You are welcome to use it as you like. However this is just for learning. If you want to use NN for real purposes then maybe you should use a framework like Encog – https://github.com/encog/encog-java-core/downloads

      kunuk Nykjaer

      March 1, 2013 at 4:06 pm

      • I have some questions about feed forward with back propagation.
        First, how will I use my features extracted during image processing so that the neural network will be trained? Which part of code will the features will be feed to the neural network? I have more or less 15 features to be extracted from an image.

        jralvero

        March 5, 2013 at 4:32 pm

  7. Hi, I have a question. If I have more or less 200 samples for training, what would be the learning rate and momentum to be used in order to have a successful training? Do these parameters affect the speed of the training?

    Gi

    March 5, 2013 at 6:08 pm

    • I would say you will have to experiment with the values. I was told long time a general good number for those are 0.9 for learning rate and 0.7 for momentum.

      kunuk Nykjaer

      May 10, 2013 at 7:16 pm

  8. What is leftneuron and rightneuron??

    Lich Duong

    May 4, 2013 at 1:48 am

    • It is input, output

      Visualize the network going from left to right.
      A connection has an input and output named leftNeuron and rightNeuron

      kunuk Nykjaer

      May 4, 2013 at 8:32 pm

  9. Hello, thankyou so much for posting this article, It really helps me. I’m beginner in Java and neural network, I tried your codes in Netbeans to see the result, but Netbeans detected some errors in neuralnetwork.java, especially for this syntax >> ArrayList connections = n.getAllInConnections();
    I read neuron.java, the method (getAllInConnections) returns an arraylist with the type is connection, but you make the value returns to neuron arraylist and there is double w as the value for getWeight method from Connection.java, but It is called by neuron variable, and I guess, it isn’t inheritor class of connection.java. I need your help for solving these.

    Thankyou in Advance πŸ™‚

    Nadia Oyin

    May 8, 2013 at 12:20 pm

    • I used Eclipse for this. Make sure the class and file names are matching. E.g. Neuron.java is the Neuron class.

      You could add package ann; at the top in all 3 files and put them i a folder called ann.
      I have not tried it with Netbeans.

      kunuk Nykjaer

      May 10, 2013 at 6:56 pm

      • Thanks for suggestion, I tried with Eclipse and it worked. I have a question about the backpropagation concept. I’ve trained a network with Encog then I pulled the weight (input to hidden layer and hidden to output layer) and I found that the number of weight didn’t follow the number of entered pattern, for example I have 340 pattern. Each pattern consist of 9 inputs. I set the number of node in hidden layer is 1, the number of hidden layer is only 1, and the outputs are 2. If it were in encog, the number of weight were only 12. If I used my own network, it were 12 x 340.

        I read some books and found that there wasn’t an explicit explaination about this problem. So I beg you to give some opinions about it.

        Thanks in advanced.

        Nadia Oyin

        May 20, 2013 at 8:22 pm

  10. hy , i look for your code its nice , but i have some questions , for the training cycle , you use batch training or online training , it means you update the weights after the epoch or after each input line ? and thank you !

    Simo

    May 14, 2013 at 1:13 pm

    • In the NeuralNetwork.java code from line 208 to 223 is the epoch cycle.

      For each input I activate, then calculate the error and then update the weights (back propagation).
      The method applyBackpropagation on line 222 updates the weights

      That would be online training or after each input line.

      kunuk Nykjaer

      May 14, 2013 at 9:21 pm

  11. sir i want use this backpropogation techniqye in to predict disease infected people after 10 days,can i use this code or else …which technique can i use in neural network

    guna

    May 22, 2013 at 7:28 am

  12. Hi,
    Thank u for posting such code, it’s beautiful. I ask about wich values can serve for testing. I made an on line testing and i took the last weights updated to test my neural network, but it gives me always one output value however i changed the inputs. do u have any idea where i missed?

    Marwa

    May 27, 2013 at 2:31 pm

  13. Hey,

    Could you explain to me what the “id” in Connection and Neuron is for?

    Thanks in advance

    BP

    June 26, 2013 at 4:30 pm

    • They are references in the NN-system and are mainly used for saving/loading the weights once the NN has been trained.

      Once you have trained your NN you probably want to save the NN weights.
      Then next time you have a new dataset you want to use the NN on, you can load your trained NN weights and run the NN on the new dataset.

      kunuk Nykjaer

      June 28, 2013 at 10:51 am

  14. You have put in a nice work here, I am new to neural networks. So a basic question I have 25 feature vectors, so are these feature vectors be used as individual input units ?

    sonal

    September 16, 2013 at 8:10 am

  15. so tall !! but, thanks

    Rabeya Moni

    February 24, 2014 at 7:28 am

    • Hi,
      Nice work!
      We would like to know please if your implementation for the backpropagation part is specific for the xor problem?
      The issue is that we try to implement neural-net for the MNIST problem, so we took a look on your code and even tried to run it with a slightly minor changes but unfortunately it didn’t converge as we expected…
      Could you please refer us to some good explanation with detailed equations on how to back propagate?
      Thank you.

      tiferet55@gmail.com

      April 18, 2014 at 1:25 pm

      • This is not xor specific, should be able to handle all NN-tasks. This code is a basic example of NN with a hidden layer. You could try to use a NN-library such as Encog and compare the result. Encog has code samples and supports various NN-setup. Encog is open source. https://github.com/encog

        kunuk Nykjaer

        April 19, 2014 at 7:22 pm

  16. and does your code support more than one hidden layer?

    tiferet55@gmail.com

    April 18, 2014 at 5:11 pm

  17. Can I use this code for comparing images?

    Jins

    April 28, 2014 at 6:11 pm

  18. Does your code support more than 4 inputs ? I want to create neural network for alphabet recognition with 11 inputs for each alphabet, and 5 times sampling (so there is 11*26*5 inputs, right? or it is just 11*26 inputs ?) with 26 outputs. But I have no idea how to implement your code to them. May you help me?

    Annisa Kartikasari

    August 8, 2015 at 4:31 am

  19. hii, i have used your code im my project , I am applying real numbers as input and so as the output to my network with 2 input layers , 4 hidden layers and 1 output layer.
    For e.g. input=[{2,3}] expectedOutput = [4]
    Result i am getting is Inputs: 2.0 3.0 Expected output : 4.0 Actual Output : 0.99999999999
    Everytime i apply some value greater than 1 or even negative numbers its not getting the expected output not even close to that. Any help would be appreciated.
    Thanks in advance πŸ™‚

    Muniraj Tomar

    September 29, 2015 at 11:21 am

  20. I want to ask something, I need more than one hidden layer for increasing my accuracy. But in your code is just one hidden layer, how making a hidden layer are more than one?

    subhan nooriansyah

    November 26, 2015 at 3:23 am

  21. hii, I have a small doubt about updating the weights.
    In neural network back propagation algorithm before 1 EPOCH it selects the weights randomly, After 1 EPOCH it updates the weights.
    My doubt is how to update the weights in testing time for that code

    satya

    May 16, 2016 at 9:28 am


Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out / Change )

Twitter picture

You are commenting using your Twitter account. Log Out / Change )

Facebook photo

You are commenting using your Facebook account. Log Out / Change )

Google+ photo

You are commenting using your Google+ account. Log Out / Change )

Connecting to %s

%d bloggers like this: