Software Programming

Kunuk Nykjaer

Backtracking – Subset sum with C#

Here’s an example of backtracking algorithm implemented in C#.
This solves the Subset sum

Subset sum problem is NP-complete and depending on your data set the running time can be very slow.

program.cs

```using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Globalization;
using System.Linq;

/// <summary>
/// Author: Kunuk Nykjaer
/// Backtracking Algorithm in C#
/// Subset sum
///
/// With list numbers and a given sum value
/// Find subset of list which adds up to the given sum value.
/// </summary>
public class Program
{
static readonly Action<object> Write = Console.Write;

// Composition root
static void Main(string[] args)
{

// Use timer for checking algo time
var sw = new Stopwatch();
sw.Start();

// -----------------------------------------
// ** INSERT YOUR DATA HERE **
// Init config,
var config = new Config
{
MaxSolutions = 1,          // restrict result count
ShowProgress = true,       // show backtrack progress
ShowStatus = true,         // show iteration progress
DisplaySolutions = true,   // Show solution list
DetectCodeError = false,   // slower running time if true
};

// set
var set = new List<float> { -7, -3, -2, 5, 8 };

// subset sum
var sum = 0;

// ** END INSERT DATA **
// -----------------------------------------

// Init data
var P = new Data(set, sum, config);

// Init backtrack algo
var bt = new Backtrack(Write, P);

// Find solutions
bt.Run();

// Algo done, stop time
sw.Stop();

// Show results
bt.Show();

Write(string.Format("\nSec: {0}\nPress a key to exit\n",
sw.ElapsedMilliseconds / 1000d));

}
}

class Config
{
public int MaxSolutions { get; set; }
public bool ShowProgress { get; set; }
public bool ShowStatus { get; set; }
public bool DisplaySolutions { get; set; }
public bool DetectCodeError { get; set; }
public Config() { MaxSolutions = 1; }
}

class Backtrack
{
private bool _maxSolutionsReachedExitBt;

public Backtrack(Action<object> write, Data P)
{
_write = write;
_p = P;
}

void UndoUpdate(Data P, Candidate c, int depth)
{
P.Remove(c);
if (P.Config.ShowProgress) { Print(P, c, depth); }
}

void Update(Data P, Candidate c, int depth)
{
if (P.Config.ShowProgress) { Print(P, c, depth); }
}

/*
wikipedia:
http://en.wikipedia.org/wiki/Backtracking

procedure bt(P,c)
if reject(P,c) then return
update(P,c) // update data
if accept(P,c) then output(P,c)
s ← first(P,c)
while s ≠ Λ do
bt(P,s)
s ← next(P,s)
end while
undoUpdate(P,c) // backtracking starts here
end procedure
*/

// O(?)
void Bt(Data P, Candidate c, int depth)
{
if (Reject(P, c)) { return; }

// Update data
Update(P, c, depth);

if (P.Config.ShowStatus) { ShowStatus(P, c); } // print status

if (Accept(P, c)) { SaveResult(P, c); }

if (_maxSolutionsReachedExitBt) { return; } // custom exit

var s = First(P, c);

while (s != null)
{
Bt(P, s, depth + 1);
s = Next(P, s);
}

if (_maxSolutionsReachedExitBt) { return; } // custom exit

// Leaf, dead end, backtrack, roll back data
UndoUpdate(P, c, depth);
}

// O(1)
bool Accept(Data P, Candidate c)
{
const float epsilon = 0.000001f;
if (Math.Abs(P.Sum - P.WorkingSum) < epsilon) { return true; }

return false;
}

// O(1)
bool Reject(Data P, Candidate c)
{
if (_maxSolutionsReachedExitBt) { return true; }

if (c == null) { throw new ArgumentNullException("c"); }

var i = P.Numbers[c.Index];

// Optimization vs. iterate all combinations
// Reject if rest of sorted list contains positive numbers
if (i >= 0 && i + P.WorkingSum > P.Sum){ return true; }

return false;
}

public void Run()
{
var setsum = _p.Numbers.Sum();
if (_p.Sum > setsum)
{
_write(string.Format("Sum: {0} is bigger than set sum {1}, no solutions exists\n",
_p.Sum, setsum));
return;
}

for (int i = 0; i < _p.Numbers.Length; i++)
{
Bt(_p, new Candidate { Index = i }, 1);
}
}

public void Show()
{
_write(string.Format("Max solutions: {0}\n", _p.Config.MaxSolutions));
_write(string.Format("Sum: {0}\n", _p.Sum));
_write(string.Format("Set:\n"));
_p.Numbers.ToList().ForEach(a => _write(a + ", "));

if (_p.Config.DisplaySolutions)
{
_write(string.Format("\n\n*** Solutions:\n\n"));
_p.SolutionsList.ForEach(_write);
}

_write(string.Format("\nUnique Paths Tried: {0}\n", _p.PathsTried));
_write(string.Format("Solutions count: {0}\n", _p.Solutions));
}

// O(n) where n is path length
void SaveResult(Data P, Candidate c)
{
P.Solutions++;

if (P.Config.DisplaySolutions)
{
// Only save result data if to be displayed
var list = P.Stack.ToList();
list.Reverse();

string numbers = list.Aggregate("", (a, b) => a + (P.Numbers[b] + ", "));
string indexes = list.Aggregate("", (a, b) => a + (b + ", "));
}

if (P.Solutions >= P.Config.MaxSolutions) { _maxSolutionsReachedExitBt = true; }
}

// First valid child from path
// O(1)
Candidate First(Data P, Candidate c)
{
int j = c.Index + 1;
if (j >= P.Numbers.Length) { return null; }

return new Candidate { Index = j };
}

// Next sibling from current leaf
// O(1)
Candidate Next(Data P, Candidate c)
{
int j = c.Index + 1;
if (j >= P.Numbers.Length) { return null; }

return new Candidate { Index = j };
}

void Print(Data P, Candidate c, int depth)
{
_write(string.Format("path: {0}  \tsum: {1}  \tdepth: {2}\n",
P.Path.Get(), P.WorkingSum, depth));
}

void ShowStatus(Data P, Candidate c)
{
// Debug
if (P.PathsTried % 500000 == 0)
{
_write(string.Format("Paths tried: {0:n0}  \tSolutions: {1}  \tPath: {2}\n",
P.PathsTried, P.Solutions, P.Path.Get()));
}
}
}

class Candidate
{
public int Index { get; set; }
public override string ToString()
{
return string.Format("{0}", Index);
}
}

class Data
{
// Init data
public Config Config;
public float Sum { get; private set; }
public float[] Numbers { get; private set; }

// Backtracking data
public bool[] Path { get; private set; }
public float WorkingSum { get; private set; }
public Stack<int> Stack = new Stack<int>();

// Result data
public int PathsTried;
public List<string> SolutionsList = new List<string>();
public int Solutions { get; set; }

// Only used for Detect code error
public HashSet<string> SetPathsTried = new HashSet<string>();

public Data(List<float> numbers, float sum, Config config)
{
numbers.Sort(); // sort is used for reject condition in bt algo
Numbers = numbers.ToArray();
Path = new bool[Numbers.Length];
Sum = sum;
Config = config;
}

// O(1)
public void Remove(Candidate c)
{
if (c == null) { throw new ArgumentNullException("c"); }

var i = Stack.Pop();
UpdatePath(false, i);
WorkingSum -= Numbers[i];
}

// O(1) (or O(n) if detectCodeError)
{
if (c == null) { throw new ArgumentNullException("c"); }

UpdatePath(true, c.Index);
WorkingSum += Numbers[c.Index];
Stack.Push(c.Index);
PathsTried++;

if (Config.DetectCodeError)
{
var path = Stack.GetString(); // O(n) where n is stack count
if (SetPathsTried.Contains(path)) // O(1)
{
throw new ApplicationException(string.Format("Path already visited: {0}", path));
}
}
}

// O(1)
void UpdatePath(bool value, int index)
{
if (Path[index] == value)
{
throw new ApplicationException(string.Format("Path already set for index: {0}, value: {1}",
index, Numbers[index]));
}
Path[index] = value;
}
}

static class Extensions
{
public static string GetString<T>(this IEnumerable<T> arr)
{
return arr.Aggregate("", (a, b) => a + string.Format("{0}->", b));
}
public static string Get(this IEnumerable<bool> arr)
{
return arr.Aggregate("", (a, b) => a + (b ? "1" : "0"));
}
}
```

Examples of usage and results

Set including negative numbers

Find subset sum from set with negative and positive numbers.

Configuration is set as:
– MaxSolutions = 3,
– ShowProgress = false,
– ShowStatus = true,
– DisplaySolutions = true

```Max solutions: 3
Sum: 100
Set:
-10, -1, 1, 2, 5, 10, 20, 20, 30, 40, 50,

*** Solutions:

Numbers: -10, -1, 1, 10, 20, 30, 50,
Indexes: 0, 1, 2, 5, 6, 8, 10,

Numbers: -10, -1, 1, 10, 20, 30, 50,
Indexes: 0, 1, 2, 5, 7, 8, 10,

Numbers: -10, -1, 1, 20, 20, 30, 40,
Indexes: 0, 1, 2, 6, 7, 8, 9,

Unique Paths Tried: 162
Solutions count: 3

Sec: 0.007
```

Solutions count only

Here we are interested of numbers of subset sum matching the sum value.

Configuration is set as:
– MaxSolutions = int.MaxValue,
– ShowProgress = false,
– ShowStatus = true,
– DisplaySolutions = false

```Paths tried: 500.000    Solutions: 14810   Path: 110111010011000000110000000
Paths tried: 1.000.000  Solutions: 28743   Path: 101111011001111010011000000
Paths tried: 1.500.000  Solutions: 42862   Path: 100111010101001000001000000
Paths tried: 2.000.000  Solutions: 56129   Path: 100000000000000101110100000
Paths tried: 2.500.000  Solutions: 70302   Path: 010111110101101001100000000
Paths tried: 3.000.000  Solutions: 83851   Path: 010000010100111010010010000
Paths tried: 3.500.000  Solutions: 97535   Path: 001000100111100010000000000
Paths tried: 4.000.000  Solutions: 110731  Path: 000001011010011000111000000
Max solutions: 2147483647
Sum: 500
Set:
10, 10, 15, 15, 20, 20, 25, 25, 30, 30, 35, 35, 40, 40, 45, 45, 50,
50, 55, 60, 65, 100, 200, 300, 400, 500, 600,

Unique Paths Tried: 4114568
Solutions count: 113508

Sec: 3.639
```

Floating numbers

```Max solutions: 5
Sum: 3
Set:
-1, 0.1, 0.2, 0.5, 1, 1.5, 2, 2.5, 2.9,

*** Solutions:

Numbers: -1, 0.1, 1, 2.9,
Indexes: 0, 1, 4, 8,

Numbers: -1, 0.5, 1, 2.5,
Indexes: 0, 3, 4, 7,

Numbers: -1, 0.5, 1.5, 2,
Indexes: 0, 3, 5, 6,

Numbers: -1, 1.5, 2.5,
Indexes: 0, 5, 7,

Numbers: 0.1, 2.9,
Indexes: 1, 8,

Unique Paths Tried: 98
Solutions count: 5
```

Written by kunuk Nykjaer

December 25, 2012 at 2:22 pm

Posted in Algorithm, Csharp

Tagged with , ,

Backtracking – Knight’s Tour with C#

Here’s an example of backtracking algorithm implemented in C#.
This solves the Knight’s Tour

program.cs

```using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Linq;

/// <summary>
/// Author: Kunuk Nykjaer
/// Backtracking Algorithm in C#
/// Knight's Tour on NxN board
/// </summary>
public class Program
{
static readonly Action<object> Write = Console.Write;

// Composition root
static void Main(string[] args)
{
// Use timer for checking algo time
var sw = new Stopwatch();
sw.Start();

// Init board and set size
var board = new Board(5, Write);

// Init config
var config = new Config
{
MaxSolutions = 1,
ShowBoardProgress = false,
ShowStatus = true,
DetectCodeError = false
};

// Init data
var P = new Data(config);

// Init backtrack algo
var bt = new Backtrack(board, Write, P);

// Find solutions
bt.Run();

// Algo done, stop time
sw.Stop();

// Show results
bt.Show();

Write(string.Format("\nSec: {0}\nPress a key to exit\n",
sw.ElapsedMilliseconds / 1000d));

}
}

class Backtrack
{

public Backtrack(Board board, Action<object> write, Data P)
{
_board = board;
_write = write;
_p = P;
}

void UndoLastMove(Data P)
{
P.RemoveLast();

// Debug
if (!P.ExitBacktrack && P.Config.ShowBoardProgress)
{
_board.Print(P.Path);
}
}

void UpdatePath(Data P, Vector c)
{
if (c == null)
{
throw new ArgumentNullException("c");
}

Cell last = P.Path.Last();

// Debug
if (!P.ExitBacktrack && P.Config.ShowBoardProgress)
{
_board.Print(P.Path);
}
}

/*
wikipedia:
http://en.wikipedia.org/wiki/Backtracking

procedure bt(P,c)
if reject(P,c) then return
if accept(P,c) then output(P,c)
s ← first(P,c)
while s ≠ Λ do
bt(P,s)
s ← next(P,s)
end while

// backtrack starts here
end procedure
*/

// O(?)
void Bt(Data P, Vector c)
{
if (Reject(P, c)) { return; }

UpdatePath(P, c);

// Debug
DebugEpoch(P, c);

if (Accept(P)) { SaveResult(P); }

var s = First(P);

while (s != null)
{
Bt(P, s);
s = Next(P, s);
}

// Leaf, dead end, backtrack, roll back path data
UndoLastMove(P);
}

// O(1)
bool Accept(Data P)
{
// Accept condition
if (P.Path.Count >= _board.Size * _board.Size)
{
return true;
}
return false;
}

// O(1)
bool Reject(Data P, Vector c)
{
// Custom early exit
if (P.ExitBacktrack)
{
return true;
}

if (c == null)
{
return true;
}

Cell last = P.Path.Last(); // last visit
Cell candidate = last.AddVector(c); // consider candidate
if (P.Visited.Contains(candidate))
{
return true;
}

return !_board.IsInsideBoard(candidate);
}

void DebugEpoch(Data P, Vector c)
{
if (P.ExitBacktrack)
{
return;
}

// Debug
if (P.Config.ShowStatus && P.PathTried.Count % 200000 == 0)
{
_write(string.Format("PathTried: {0:n0}, Solutions: {1}\n",
P.PathTried.Count, P.Solutions.Count));
}
}

public void Run()
{
Cell init = new Cell(0, 0); // init pos

Vector first = First(_p);
Bt(_p, first);
}

public void Show()
{
_write("\n-----------------\n");
foreach (var s in _p.Solutions)
{
_board.Print(s);
_write("-----------------\n");
}
_write(string.Format("\nPaths tried: {0:n0}\n", _p.PathTried.Count));
_write(string.Format("\nSolutions found: {0}\n", _p.Solutions.Count));
}

// O(n) where n is path length
void SaveResult(Data P)
{
if (P.Solutions.Count >= P.Config.MaxSolutions)
{
P.ExitBacktrack = true;
}
}

// First valid child from path
// O(1)
Vector First(Data P)
{
Cell last = P.Path.Last();
IList<Vector> moves = _board.GetValidMoves(last);

}

// Next sibling from current leaf
// O(1)
Vector Next(Data P, Vector c)
{
return _board.Next(c);
}
}

class XY
{
public int X { get; set; }
public int Y { get; set; }

public XY(int x, int y)
{
X = x;
Y = y;
}

public override int GetHashCode()
{
}
public override string ToString()
{
return string.Format("[{0},{1}]", X, Y);
}
public override bool Equals(object obj)
{
var other = obj as XY;
if (other == null) { return false; }

return GetHashCode() == other.GetHashCode();
}
}

// Position on board
class Cell : XY
{
public Cell(int x, int y) : base(x, y) { }
}

// Move definition
class Vector : XY
{
private static int _count;
public int Id { get; private set; }
public Vector(int x, int y)
: base(x, y)
{
Id = _count++;
}
}

class Board
{
// Define move of Knight
private static readonly Vector[] Moves = new[] {
new Vector(2, 1),
new Vector(1, 2),
new Vector(-1, 2),
new Vector(-2, 1),
new Vector(-2, -1),
new Vector(-1, -2),
new Vector(1, -2),
new Vector(2, -1)
};

private void CleanBoard()
{
// clearn
foreach (var i in _board)
{
for (var j = 0; j < i.Length; j++)
{
i[j] = "--";
}
}
}

// Updating display data
private void UpdateBoard(IList<Cell> path)
{
CleanBoard();
for (var i = 0; i < path.Count; i++)
{
Cell p = path[i];
_board[p.Y][p.X] = i.ToString("00");
}
}

// Next movement
public Vector Next(Vector m)
{
if (m.Id >= Moves.Length - 1)
{
return null;
}
return Moves[m.Id + 1];
}

public int Size { get; private set; }
public Board(int size, Action<object> write)
{
Size = size;
_write = write;

// Board is only used for printing display
_board = new string[Size][];

for (var i = 0; i < _board.Length; i++)
{
_board[i] = new string[Size];
}
}

// O(1) because Moves.Count is always 8
// Get valid moves within board
public List<Vector> GetValidMoves(Cell c)
{
}

// O(1)
public bool IsInsideBoard(Cell c)
{
return c.X >= 0 && c.X < Size && c.Y >= 0 && c.Y < Size;
}

public void Print(List<Cell> p)
{
UpdateBoard(p);

_write("\n");
foreach (var y in _board)
{
foreach (var x in y)
{
_write(x + " ");
}
_write("\n\n");
}
}
}

class Data
{
public bool ExitBacktrack { get; set; }
public Config Config;

public readonly List<Cell> Path = new List<Cell>();
public readonly HashSet<Cell> Visited = new HashSet<Cell>();
public readonly List<List<Cell>> Solutions = new List<List<Cell>>();
public readonly HashSet<string> PathTried = new HashSet<string>();

public Data(Config config)
{
Config = config;
}

// O(1)
public void RemoveLast()
{
Cell cell = Path.Last(); // O(1)
if (Config.DetectCodeError && !Visited.Contains(cell))
{
throw new ApplicationException("Can't remove not added cell");
}
Visited.Remove(cell); // O(1)
Path.RemoveAt(Path.Count - 1); // O(1) remove last
}

// O(1) or O(n)
{
if (Config.DetectCodeError && Visited.Contains(cell))
{
throw new ApplicationException("Can't add duplicate visited cell");
}

// O(n)
// Catch error in code, but expensive to use in running time
if (Config.DetectCodeError)
{
var path = Path.GetString();
if (PathTried.Contains(path))
{
throw new Exception("Error, path already tried");
}
}
//O(1)
else
{
}

}
}

class Config
{
public int MaxSolutions { get; set; }
public bool ShowBoardProgress { get; set; }
public bool ShowStatus { get; set; }
public bool DetectCodeError { get; set; } // cost running time if true
}

static class Extensions
{
public static string GetString<T>(this IEnumerable<T> arr)
{
return arr.Aggregate("", (a, b) => a + "->" + b);
}
public static Cell AddVector(this Cell c, Vector v)
{
return new Cell(c.X + v.X, c.Y + v.Y);
}
}
```

Examples:

Knight’s Tour on a 5 x 5 board

```00 13 18 07 24

05 08 01 12 17

14 19 06 23 02

09 04 21 16 11

20 15 10 03 22
-----------------

Paths tried: 8,840
Sec: 0.202
```

below 1 sec. for first result.

Knight’s Tour on a 6 x 6 board

```00 33 16 31 22 35

15 24 01 34 17 30

06 11 32 23 02 21

25 14 07 20 29 18

10 05 12 27 08 03

13 26 09 04 19 28
-----------------

Paths tried: 248,169
Sec: 6.024
```

About 6 seconds for first result.

Knight’s Tour on a 7 x 7 board

```00 31 38 27 40 23 48

37 28 01 24 21 26 41

30 19 32 39 02 47 22

07 36 29 20 25 42 03

18 15 08 33 44 11 46

35 06 13 16 09 04 43

14 17 34 05 12 45 10
-----------------

Paths tried: 7,151,179
Sec: 155.465
```

About 2.5 minutes for first result.

Knight’s Tour on a 8 x 8 board

```00 37 58 35 42 47 56 51

59 34 01 48 57 50 43 46

38 31 36 41 02 45 52 55

33 60 39 26 49 54 03 44

30 09 32 61 40 25 22 53

17 62 27 10 23 20 13 04

08 29 18 15 06 11 24 21

63 16 07 28 19 14 05 12
-----------------

Paths tried: 8,250,733
Sec: 186.499
```

About 3 minutes for first result.

Written by kunuk Nykjaer

December 23, 2012 at 10:47 am

Posted in Algorithm, Csharp

Tagged with , ,

Running time analysis – hashset used for running time optimization – C# example

Imagine you want to find the list of distinct numbers of all possible duplicate numbers
from two lists with unknown length and unknown content.
You want to do it as fastest as possible.

e.g.
List a = [5,2,2,6,4,3,3]
List b = [2,2,2,1,3,4,3,5]
You should find [2,3,4,5]

How would you do it?

Do you use the usual nested for loops iteration?
If you don’t mind using some extra memory you could use the hashset trick.
See the code below for the examples.

If you worry about the memory you could sort the lists
and iterate them which takes in total O(nlogn) + O(mlogm). That is not covered here.

The hashset technique takes O(n+m) which is faster.

If you don’t know that a hashset is here are some wiki readings:
http://en.wikipedia.org/wiki/Hash_table
http://en.wikipedia.org/wiki/Set_(abstract_data_type)

The code shows two styles: one that runs O(n*m) and one that runs O(n+m) with different implementations.
I will use C# Linq also to show you how it can perform bad when you don’t use it correct (ver2 vs. ver4 in the code)

Source code and demo.
Program.cs

```using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Linq;

/// <summary>
/// Author: Kunuk Nykjaer
/// </summary>
public class Program
{
const int Lengh = 1000;
const int Range = 10000;
static readonly Random Random = new Random();
static readonly List<int> List1 = new List<int>();
static readonly List<int> List2 = new List<int>();

public static void Main(string[] args)
{
for (int i = 0; i < Lengh; i++)
for (int i = 0; i < Lengh; i++)

Console.WriteLine("n and m has length: {0}", Lengh);

RunningTime1();
RunningTime2();
RunningTime3();
RunningTime4();
RunningTime5();

}

private static void RunningTime1()
{
Console.WriteLine("1.  O(n*m)  Classic nested for loops");
var sw = new Stopwatch();
sw.Start(); //O(1)
var equals = new HashSet<int>();
foreach (var i in List1) //O(n)
foreach (var j in List2) //O(m)
if (i == j) //O(1)

sw.Stop();

Console.WriteLine("Elapsed msec: {0}, equals len: {1}",
sw.ElapsedMilliseconds, equals.Count);
}

private static void RunningTime2()
{
Console.WriteLine("2.  O(n*m)  Linq version quck style");
var sw = new Stopwatch();
sw.Start();

// from... +    Distinct
//O(n*m)   +   O((n+m)/2)    )
var equals = (from i in List1 from j in List2 where i == j select i).
Distinct().ToList();

sw.Stop();

Console.WriteLine("Elapsed msec: {0}, equals len: {1}",
sw.ElapsedMilliseconds, equals.Count);
}

private static void RunningTime3()
{
Console.WriteLine("3.  O(n+m)  Linq");
var sw = new Stopwatch();
sw.Start();

var equals = new HashSet<int>(); //O(1)
var set = new HashSet<int>(); //O(1)

foreach (var i in List1) //O(n)

foreach (int i in List2.Where(set.Contains)) //O(m)

sw.Stop();
Console.WriteLine("Elapsed msec: {0}, equals len: {1}",
sw.ElapsedMilliseconds, equals.Count);
}

private static void RunningTime4()
{
Console.WriteLine("4.  O(n+m) foreach loop");

var sw = new Stopwatch();
var equals = new HashSet<int>(); //O(1)
var set = new HashSet<int>(); //O(1)
sw.Start();
foreach (var i in List1) //O(n)
foreach (int i in List2) //O(m)
if (set.Contains(i)) //O(1)

sw.Stop();
Console.WriteLine("Elapsed msec: {0}, equals len: {1}",
sw.ElapsedMilliseconds, equals.Count);
}

private static void RunningTime5()
{
Console.WriteLine("5.  O(n+m) for loop");

var sw = new Stopwatch();
sw.Start();

var equals = new HashSet<int>(); //O(1)
var set = new HashSet<int>(); //O(1)

int count1 = List1.Count; //O(1)
for (int i = 0; i < count1; i++) //O(n)

int count2 = List2.Count; //O(1)
for (int i = 0; i < count2; i++) //O(m)
{
int j = List2[i]; //O(1)
if (set.Contains(j)) //O(1)
}

sw.Stop();
Console.WriteLine("Elapsed msec: {0}, equals len: {1}",
sw.ElapsedMilliseconds, equals.Count);
}
}
```

O(n*m) and O(n+m)

```n and m has length: 10000
1.  O(n*m)  Classic nested for loops
Elapsed msec: 1582, equals len: 900
2.  O(n*m)  Linq version quck style
Elapsed msec: 8446, equals len: 900
3.  O(n+m)  Linq
Elapsed msec: 8, equals len: 900
4.  O(n+m) foreach loop
Elapsed msec: 1, equals len: 900
5.  O(n+m) for loop
Elapsed msec: 1, equals len: 900
```

Ver2 is a bad implementation using Linq. Linq can cost you running time if you use it poorly.

O(n*m) and O(n+m)

```n and m has length: 20000
1.  O(n*m)  Classic nested for loops
Elapsed msec: 6285, equals len: 3234
2.  O(n*m)  Linq version quck style
Elapsed msec: 34932, equals len: 3234
3.  O(n+m)  Linq
Elapsed msec: 10, equals len: 3234
4.  O(n+m) foreach loop
Elapsed msec: 4, equals len: 3234
5.  O(n+m) for loop
Elapsed msec: 3, equals len: 3234
```

The poor version takes 30 sec. and the hashset trick are maximum 10 msec.

Here is a test with larger list, the O(n*m) versions were to slow to include.
O(n+m) with large list

```n and m has length: 1000000
3.  O(n+m)  Linq
Elapsed msec: 505, equals len: 90594
4.  O(n+m) foreach loop
Elapsed msec: 537, equals len: 90594
5.  O(n+m) for loop
Elapsed msec: 499, equals len: 90594
```

Even for large list, the time is below 1 sec.
For nested loops it would have taken 12 days!
(source: Algorithm Design book by Jon Kleinberg and Éva Tardos, chapter 2, O(n*n) where n=1.000.000)

The lesson to take is:
When you are about to code something using nested for loops, first see if the list is large, if yes consider using this trick if fast running time is important.

Written by kunuk Nykjaer

March 4, 2012 at 12:52 am

Q-learning Library example with C#

This is a follow up of my previous post – Q-learning example with Java
https://kunuk.wordpress.com/2010/09/24/q-learning/

Imagine you had a robot, just like Bender here and you want it to learn to navigate correctly to the destination (the beer). Your possible moves are move up, right, down or left from any room. And you want some kind of automatic system that gives you the solution. This is what this library can do for you. You configure the code with possible states, possible actions and the action outcomes. E.g. when you move right from the bedroom the action result is Bender has moved to the Hallway and if you move left from the bedroom then Bender hits the wall and stays in the bedroom. You will need to set up a reward value when Bender reaches the beer e.g. 100 points. That’s it!
No logic programming from your part, just some configuration.

DISCLAIMER:

In my previous post I made a quick example in Java and this time I will use C#.
This version will support non-deterministic action outcomes and I will include examples of how to use this library. The code examples are included at the bottom. You need QLearning.cs to run these examples.

• Path finding Bender
• Path finding demo (same example as in my previous post)
• Rock paper scissors demo

Path finding Bender example
Path finding Bender result:

```** Q-Learning structure **
State Bedroom
Action up
ActionResult State Bedroom, Prob. 1, Reward 0, PrevState Bedroom, QE 72.9
Action right
ActionResult State Hallway, Prob. 1, Reward 0, PrevState Bedroom, QE 81
Action down
ActionResult State Bedroom, Prob. 1, Reward 0, PrevState Bedroom, QE 72.9
Action left
ActionResult State Bedroom, Prob. 1, Reward 0, PrevState Bedroom, QE 72.9
State Hallway
Action up
ActionResult State Hallway, Prob. 1, Reward 0, PrevState Hallway, QE 81
Action right
ActionResult State Hallway, Prob. 1, Reward 0, PrevState Hallway, QE 81
Action down
ActionResult State Stairwell, Prob. 1, Reward 0, PrevState Hallway, QE 90
Action left
ActionResult State Bedroom, Prob. 1, Reward 0, PrevState Hallway, QE 72.9
State Stairwell
Action up
ActionResult State Hallway, Prob. 1, Reward 0, PrevState Stairwell, QE 81
Action right
ActionResult State Stairwell, Prob. 1, Reward 0, PrevState Stairwell, QE 90
Action down
ActionResult State Stairwell, Prob. 1, Reward 0, PrevState Stairwell, QE 90
Action left
ActionResult State Kitchen, Prob. 1, Reward 100, PrevState Stairwell, QE 100

** Show Policy **
From state Bedroom do action right, max QEstimated is 81
From state Hallway do action down, max QEstimated is 90
From state Stairwell do action left, max QEstimated is 100
```

Bender has learned the fastest way to get some beer. He knows which direction to take from any room.

Path finding example

Path finding result:

```** Q-Learning structure **
State A
Action from_A_to_B
ActionResult State B, Prob. 1, Reward 0, PrevState A, QE 90
Action from_A_to_D
ActionResult State D, Prob. 1, Reward 0, PrevState A, QE 72.9
State B
Action from_B_to_A
ActionResult State A, Prob. 1, Reward 0, PrevState B, QE 81
Action from_B_to_C
ActionResult State C, Prob. 1, Reward 100, PrevState B, QE 100
Action from_B_to_E
ActionResult State E, Prob. 1, Reward 0, PrevState B, QE 81
State C
Action from_C_to_C
ActionResult State C, Prob. 1, Reward 0, PrevState C, QE 0
State D
Action from_D_to_A
ActionResult State A, Prob. 1, Reward 0, PrevState D, QE 81
Action from_D_to_E
ActionResult State E, Prob. 1, Reward 0, PrevState D, QE 81
State E
Action from_E_to_B
ActionResult State B, Prob. 1, Reward 0, PrevState E, QE 90
Action from_E_to_D
ActionResult State D, Prob. 1, Reward 0, PrevState E, QE 72.9
Action from_E_to_F
ActionResult State F, Prob. 1, Reward 0, PrevState E, QE 90
State F
Action from_F_to_C
ActionResult State C, Prob. 1, Reward 100, PrevState F, QE 100
Action from_F_to_E
ActionResult State E, Prob. 1, Reward 0, PrevState F, QE 81

** Show Policy **
From state A do action from_A_to_B, max QEstimated is 90
From state B do action from_B_to_C, max QEstimated is 100
From state C do action from_C_to_C, max QEstimated is 0
From state D do action from_D_to_A, max QEstimated is 81
From state E do action from_E_to_B, max QEstimated is 90
From state F do action from_F_to_C, max QEstimated is 100
```

The results are exactly the same as in my previous post.

Rock paper scissors result:

```** Q-Learning structure **
State Begin
Action from_Begin_to_Rock
ActionResult State Rock, Prob. 0.2, Reward 0, PrevState Begin, QE 0
ActionResult State Paper, Prob. 0.5, Reward -10, PrevState Begin, QE -0.91
ActionResult State Scissor, Prob. 0.3, Reward 100, PrevState Begin, QE 4.11

Action from_Begin_to_Paper
ActionResult State Rock, Prob. 0.2, Reward 100, PrevState Begin, QE 2.44
ActionResult State Paper, Prob. 0.5, Reward 0, PrevState Begin, QE 0
ActionResult State Scissor, Prob. 0.3, Reward -10, PrevState Begin, QE -0.41
Action from_Begin_to_Scissor
ActionResult State Rock, Prob. 0.2, Reward -10, PrevState Begin, QE -0.24
ActionResult State Paper, Prob. 0.5, Reward 100, PrevState Begin, QE 9.09
ActionResult State Scissor, Prob. 0.3, Reward 0, PrevState Begin, QE 0

** Show Policy **
From state Begin do action from_Begin_to_Scissor, max QEstimated is 9.09

** Opponent style **
style is rock 0.2 paper 0.5 scissor 0.3
```

Here the opponents style is to pick paper 50% of the time. The robot learns correctly to pick scissors as the action policy for maximizing the reward outcome.

In the advanced version of path finding, there is a hill from state B to C.
whenever the robot wants to go climb the hill from state B to C, there is a 10% of success and 90% of chance it will slide back to state A.

```** Q-Learning structure **
State A
Action from_A_to_B
ActionResult State B, Prob. 1, Reward 0, PrevState A, QE 72.9
Action from_A_to_D
ActionResult State D, Prob. 1, Reward 0, PrevState A, QE 72.9
State B
Action from_B_to_A
ActionResult State A, Prob. 1, Reward 0, PrevState B, QE 65.61
Action from_B_to_C
ActionResult State C, Prob. 0.1, Reward 100, PrevState B, QE 1.1
ActionResult State A, Prob. 0.9, Reward 0, PrevState B, QE 31.08
Action from_B_to_E
ActionResult State E, Prob. 1, Reward 0, PrevState B, QE 81
State C
Action from_C_to_C
ActionResult State C, Prob. 1, Reward 0, PrevState C, QE 0
State D
Action from_D_to_A
ActionResult State A, Prob. 1, Reward 0, PrevState D, QE 65.61
Action from_D_to_E
ActionResult State E, Prob. 1, Reward 0, PrevState D, QE 81
State E
Action from_E_to_B
ActionResult State B, Prob. 1, Reward 0, PrevState E, QE 72.9
Action from_E_to_D
ActionResult State D, Prob. 1, Reward 0, PrevState E, QE 72.9
Action from_E_to_F
ActionResult State F, Prob. 1, Reward 0, PrevState E, QE 90
State F
Action from_F_to_C
ActionResult State C, Prob. 1, Reward 100, PrevState F, QE 100
Action from_F_to_E
ActionResult State E, Prob. 1, Reward 0, PrevState F, QE 81

** Show Policy **
From state A do action from_A_to_B, max QEstimated is 72.9
From state B do action from_B_to_E, max QEstimated is 81
From state C do action from_C_to_C, max QEstimated is 0
From state D do action from_D_to_E, max QEstimated is 81
From state E do action from_E_to_F, max QEstimated is 90
From state F do action from_F_to_C, max QEstimated is 100
```

The robot has learned to take the action ‘go to state E’ instead of ‘go to state C’ when it is in state B.
10% of success is not good enough and the robot wisely chooses a longer but more rewarding path.

The data structure is as follows:
The QLearning algorithm has multiple states, for every state there are multiple possible actions and for each action there are multiple possible action outcomes.

PathFindingBenderDemo.cs

```using System;
using QLearningFramework;

namespace ConsoleQLearning
{
class PathFindingBenderDemo
{
// ----------- Insert the state names here -----------
internal enum StateNameEnum
{
Bedroom, Hallway, Stairwell, Kitchen
}
// ----------- End Insert the state names here -------

static void Main(string[] args)
{
DateTime starttime = DateTime.Now;

PathFinding();

double timespend = DateTime.Now.Subtract(starttime).TotalSeconds;
Console.WriteLine("\n{0} sec. press a key ...", timespend.Pretty()); Console.ReadKey();
}

static void PathFinding()
{
QLearning q = new QLearning();
QAction fromTo;
QState state;
string stateName;
string stateNameNext;

// ----------- Begin Insert the path setup here -----------
// insert the end states here, e.g. goal state

// State Bedroom
stateName = StateNameEnum.Bedroom.EnumToString();
// action up
stateNameNext = StateNameEnum.Bedroom.EnumToString();
state.AddAction(fromTo = new QAction(stateName, new QActionName("up")));
// action right
stateNameNext = StateNameEnum.Hallway.EnumToString();
state.AddAction(fromTo = new QAction(stateName, new QActionName("right")));
// action down
stateNameNext = StateNameEnum.Bedroom.EnumToString();
state.AddAction(fromTo = new QAction(stateName, new QActionName("down")));
// action left
stateNameNext = StateNameEnum.Bedroom.EnumToString();
state.AddAction(fromTo = new QAction(stateName, new QActionName("left")));

// State Hallway
stateName = StateNameEnum.Hallway.EnumToString();
// action up
stateNameNext = StateNameEnum.Hallway.EnumToString();
state.AddAction(fromTo = new QAction(stateName, new QActionName("up")));
// action right
stateNameNext = StateNameEnum.Hallway.EnumToString();
state.AddAction(fromTo = new QAction(stateName, new QActionName("right")));
// action down
stateNameNext = StateNameEnum.Stairwell.EnumToString();
state.AddAction(fromTo = new QAction(stateName, new QActionName("down")));
// action left
stateNameNext = StateNameEnum.Bedroom.EnumToString();
state.AddAction(fromTo = new QAction(stateName, new QActionName("left")));

// State Stairwell
stateName = StateNameEnum.Stairwell.EnumToString();
// action up
stateNameNext = StateNameEnum.Hallway.EnumToString();
state.AddAction(fromTo = new QAction(stateName, new QActionName("up")));
// action right
stateNameNext = StateNameEnum.Stairwell.EnumToString();
state.AddAction(fromTo = new QAction(stateName, new QActionName("right")));
// action down
stateNameNext = StateNameEnum.Stairwell.EnumToString();
state.AddAction(fromTo = new QAction(stateName, new QActionName("down")));
// action left
stateNameNext = StateNameEnum.Kitchen.EnumToString();
state.AddAction(fromTo = new QAction(stateName, new QActionName("left")));
fromTo.AddActionResult(new QActionResult(fromTo, stateNameNext, 1.0){Reward = 100});
// ----------- End Insert the path setup here -----------

q.RunTraining();
q.PrintQLearningStructure();
q.ShowPolicy();
}
}
}
```

PathFindingDemo.cs

```using System;
using QLearningFramework;

namespace ConsoleQLearning
{
class PathFindingDemo
{
// ----------- Insert the state names here -----------
internal enum StateNameEnum
{
A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P, Q, R, S, T, U,
V, W, X, Y, Z
}
// ----------- End Insert the state names here -------

static void Main(string[] args)
{
DateTime starttime = DateTime.Now;

PathFinding();

double timespend = DateTime.Now.Subtract(starttime).TotalSeconds;
Console.WriteLine("\n{0} sec. press a key ...", timespend.Pretty()); Console.ReadKey();
}

static void PathFinding()
{
QLearning q = new QLearning { Episodes = 1000, Alpha = 0.1, Gamma = 0.9,
MaxExploreStepsWithinOneEpisode = 1000 };

QAction fromTo;
QState state;
string stateName;
string stateNameNext;

// ----------- Begin Insert the path setup here -----------
// insert the end states here, e.g. goal state

// State A
stateName = StateNameEnum.A.EnumToString();
// action A -> B
stateNameNext = StateNameEnum.B.EnumToString();
state.AddAction(fromTo = new QAction(stateName, new QActionName(stateName, stateNameNext) ));
// action outcome probability
// action A -> D
stateNameNext = StateNameEnum.D.EnumToString();
state.AddAction(fromTo = new QAction(stateName, new QActionName(stateName, stateNameNext)));
// action outcome probability

// State B
stateName = StateNameEnum.B.EnumToString();
// B -> A
stateNameNext = StateNameEnum.A.EnumToString();
state.AddAction(fromTo = new QAction(stateName, new QActionName(stateName, stateNameNext)));
// B -> C
stateNameNext = StateNameEnum.C.EnumToString();
state.AddAction(fromTo = new QAction(stateName, new QActionName(stateName, stateNameNext)));
// action outcome probability
fromTo.AddActionResult(new QActionResult(fromTo, stateNameNext, 1.0) { Reward = 100 });
// B -> E
stateNameNext = StateNameEnum.E.EnumToString();
state.AddAction(fromTo = new QAction(stateName, new QActionName(stateName, stateNameNext)));

// State C
stateName = StateNameEnum.C.EnumToString();
// C -> C
stateNameNext = stateName;
state.AddAction(fromTo = new QAction(stateName, new QActionName(stateName, stateNameNext)));

// State D
stateName = StateNameEnum.D.EnumToString();
// D -> A
stateNameNext = StateNameEnum.A.EnumToString();
state.AddAction(fromTo = new QAction(stateName, new QActionName(stateName, stateNameNext)));
// D -> E
stateNameNext = StateNameEnum.E.EnumToString();
state.AddAction(fromTo = new QAction(stateName, new QActionName(stateName, stateNameNext)));

// State E
stateName = StateNameEnum.E.EnumToString();
// E -> B
stateNameNext = StateNameEnum.B.EnumToString();
state.AddAction(fromTo = new QAction(stateName, new QActionName(stateName, stateNameNext)));
// E -> D
stateNameNext = StateNameEnum.D.EnumToString();
state.AddAction(fromTo = new QAction(stateName, new QActionName(stateName, stateNameNext)));
// E -> F
stateNameNext = StateNameEnum.F.EnumToString();
state.AddAction(fromTo = new QAction(stateName, new QActionName(stateName, stateNameNext)));

// State F
stateName = StateNameEnum.F.EnumToString();
// F -> C
stateNameNext = StateNameEnum.C.EnumToString();
state.AddAction(fromTo = new QAction(stateName, new QActionName(stateName, stateNameNext)));
fromTo.AddActionResult(new QActionResult(fromTo, stateNameNext, 1.0) { Reward = 100 });
// F -> E
stateNameNext = StateNameEnum.E.EnumToString();
state.AddAction(fromTo = new QAction(stateName, new QActionName(stateName, stateNameNext)));

// ----------- End Insert the path setup here -----------

q.RunTraining();
q.PrintQLearningStructure();
q.ShowPolicy();
}
}
}
```

```using System;
using QLearningFramework;

namespace ConsoleQLearning
{
{
// ----------- Insert the state names here -----------
internal enum StateNameEnum
{
A, B, C, D, E, F
}
// ----------- End Insert the state names here -------

static void Main(string[] args)
{
DateTime starttime = DateTime.Now;

PathFinding();

double timespend = DateTime.Now.Subtract(starttime).TotalSeconds;
Console.WriteLine("\n{0} sec. press a key ...", timespend.Pretty()); Console.ReadKey();
}

static void PathFinding()
{
QLearning q = new QLearning { Episodes = 1000, Alpha = 0.1, Gamma = 0.9,
MaxExploreStepsWithinOneEpisode = 1000 };

QAction fromTo;
QState state;
string stateName;
string stateNameNext;

// ----------- Begin Insert the path setup here -----------
// insert the end states here, e.g. goal state

// State A
stateName = StateNameEnum.A.EnumToString();
// action A -> B
stateNameNext = StateNameEnum.B.EnumToString();
state.AddAction(fromTo = new QAction(stateName, new QActionName(stateName, stateNameNext) ));
// action outcome probability
// action A -> D
stateNameNext = StateNameEnum.D.EnumToString();
state.AddAction(fromTo = new QAction(stateName, new QActionName(stateName, stateNameNext)));
// action outcome probability

// State B
stateName = StateNameEnum.B.EnumToString();
// B -> A
stateNameNext = StateNameEnum.A.EnumToString();
state.AddAction(fromTo = new QAction(stateName, new QActionName(stateName, stateNameNext)));
// B -> C
stateNameNext = StateNameEnum.C.EnumToString();
state.AddAction(fromTo = new QAction(stateName, new QActionName(stateName, stateNameNext)));
// action outcome probability
fromTo.AddActionResult(new QActionResult(fromTo, StateNameEnum.C.EnumToString(), 0.1) { Reward = 100 });
// B -> E
stateNameNext = StateNameEnum.E.EnumToString();
state.AddAction(fromTo = new QAction(stateName, new QActionName(stateName, stateNameNext)));

// State C
stateName = StateNameEnum.C.EnumToString();
// C -> C
stateNameNext = stateName;
state.AddAction(fromTo = new QAction(stateName, new QActionName(stateName, stateNameNext)));

// State D
stateName = StateNameEnum.D.EnumToString();
// D -> A
stateNameNext = StateNameEnum.A.EnumToString();
state.AddAction(fromTo = new QAction(stateName, new QActionName(stateName, stateNameNext)));
// D -> E
stateNameNext = StateNameEnum.E.EnumToString();
state.AddAction(fromTo = new QAction(stateName, new QActionName(stateName, stateNameNext)));

// State E
stateName = StateNameEnum.E.EnumToString();
// E -> B
stateNameNext = StateNameEnum.B.EnumToString();
state.AddAction(fromTo = new QAction(stateName, new QActionName(stateName, stateNameNext)));
// E -> D
stateNameNext = StateNameEnum.D.EnumToString();
state.AddAction(fromTo = new QAction(stateName, new QActionName(stateName, stateNameNext)));
// E -> F
stateNameNext = StateNameEnum.F.EnumToString();
state.AddAction(fromTo = new QAction(stateName, new QActionName(stateName, stateNameNext)));

// State F
stateName = StateNameEnum.F.EnumToString();
// F -> C
stateNameNext = StateNameEnum.C.EnumToString();
state.AddAction(fromTo = new QAction(stateName, new QActionName(stateName, stateNameNext)));
fromTo.AddActionResult(new QActionResult(fromTo, stateNameNext, 1.0) { Reward = 100 });
// F -> E
stateNameNext = StateNameEnum.E.EnumToString();
state.AddAction(fromTo = new QAction(stateName, new QActionName(stateName, stateNameNext)));

// ----------- End Insert the path setup here -----------

q.RunTraining();
q.PrintQLearningStructure();
q.ShowPolicy();
}
}
}
```

RockPaperScissorsDemo.cs

```using System;
using System.Collections.Generic;
using QLearningFramework;

namespace ConsoleQLearning
{
class RockPaperScissorsDemo
{
// ----------- Insert the state names here -----------
internal enum StateNameEnum
{
Begin, Rock, Paper, Scissor
}
// ----------- End Insert the state names here -------

static void Main(string[] args)
{
DateTime starttime = DateTime.Now;

RockPaperScissors();

double timespend = DateTime.Now.Subtract(starttime).TotalSeconds;
Console.WriteLine("\n{0} sec. press a key ...", timespend.Pretty()); Console.ReadKey();
}

static void RockPaperScissors()
{
QLearning q = new QLearning { Episodes = 1000, Alpha = 0.1, Gamma = 0.9,
MaxExploreStepsWithinOneEpisode = 1000 };

var opponentStyles = new List<double[]>();
// rock paper scissor probability styles
opponentStyles.Add(new [] { 0.5, 0.3, 0.2 });
opponentStyles.Add(new [] { 0.2, 0.5, 0.3 });
opponentStyles.Add(new[] { 0.1, 0.1, 0.8 });
int index = new Random().Next(opponentStyles.Count);
var opponent = opponentStyles[index];

// opponent probability pick
double rockOpponent = opponent[0];
double paperOpponent = opponent[1];
double scissorOpponent = opponent[2];

QAction fromTo;
QState state;
string stateName;
string stateNameNext;

// ----------- Begin Insert the path setup here -----------

// insert the end states here

// State Begin
stateName = StateNameEnum.Begin.EnumToString();

// action Rock
stateNameNext = StateNameEnum.Rock.EnumToString();
state.AddAction(fromTo = new QAction(stateName, new QActionName(stateName, stateNameNext)));
// action outcome probability
rockOpponent) { Reward = 0 });
paperOpponent) { Reward = -10 });
scissorOpponent) { Reward = 100 });

// action paper
stateNameNext = StateNameEnum.Paper.EnumToString();
state.AddAction(fromTo = new QAction(stateName, new QActionName(stateName, stateNameNext)));
// action outcome probability
rockOpponent) { Reward = 100 });
paperOpponent) { Reward = 0 });
scissorOpponent) { Reward = -10 });

// action scissor
stateNameNext = StateNameEnum.Scissor.EnumToString();
state.AddAction(fromTo = new QAction(stateName, new QActionName(stateName, stateNameNext)));
// action outcome probability
rockOpponent) { Reward = -10 });
paperOpponent) { Reward = 100 });
scissorOpponent) { Reward = 0 });
// ----------- End Insert the path setup here -----------

q.RunTraining();
q.PrintQLearningStructure();
q.ShowPolicy();

Console.WriteLine("\n** Opponent style **");
Console.WriteLine(string.Format("style is rock {0} paper {1} scissor {2}",
opponent[0].Pretty(), opponent[1].Pretty(), opponent[2].Pretty()));
}
}
}
```

QLearning.cs

```using System;
using System.Collections.Generic;
using System.Globalization;
using System.Linq;
using System.Text;

namespace QLearningFramework
{
/// <summary>
/// Version 0.1
/// Author: Kunuk Nykjaer
/// one to many relationships data structure
/// QLearning [1 -- *] State [1 -- *] Action [1 -- *] ActionResult
/// </summary>
class QLearning
{
public List<QState> States { get; private set; }
public Dictionary<string, QState> StateLookup { get; private set; }

public double Alpha { get; internal set; }
public double Gamma { get; internal set; }

public HashSet<string> EndStates { get; private set; }
public int MaxExploreStepsWithinOneEpisode { get; internal set; } //avoid infinite loop
public bool ShowWarning { get; internal set; } // show runtime warnings regarding q-learning
public int Episodes { get; internal set; }

public QLearning()
{
States = new List<QState>();
StateLookup = new Dictionary<string, QState>();
EndStates = new HashSet<string>();

// Default when not set
MaxExploreStepsWithinOneEpisode = 1000;
Episodes = 1000;
Alpha = 0.1;
Gamma = 0.9;
ShowWarning = true;
}

{
}

public void RunTraining()
{
QMethod.Validate(this);

/*
For each episode: Select random initial state
Do while not reach goal state
Select one among all possible actions for the current state
Using this possible action, consider to go to the next state
Get maximum Q value of this next state based on all possible actions
Set the next state as the current state
*/

// For each episode
var rand = new Random();
long maxloopEventCount = 0;

// Train episodes
for (long i = 0; i < Episodes; i++)
{
long maxloop = 0;
// Select random initial state
int stateIndex = rand.Next(States.Count);
QState state = States[stateIndex];
QAction action = null;
do
{
if (++maxloop > MaxExploreStepsWithinOneEpisode)
{
if(ShowWarning)
{
string msg = string.Format(
"{0} !! MAXLOOP state: {1} action: {2}, {3} endstate is to difficult to reach?",
++maxloopEventCount, state, action, "maybe your path setup is wrong or the ");
QMethod.Log(msg);
}

break;
}

// no actions, skip this state
if(state.Actions.Count==0)
break;

// Selection strategy is random based on probability
int index = rand.Next(state.Actions.Count);
action = state.Actions[index];

// Using this possible action, consider to go to the next state
// Pick random Action outcome
QActionResult nextStateResult = action.PickActionByProbability();
string nextStateName = nextStateResult.StateName;

double q = nextStateResult.QEstimated;
double r = nextStateResult.Reward;
double maxQ = MaxQ(nextStateName);

// Q(s,a)= Q(s,a) + alpha * (R(s,a) + gamma * Max(next state, all actions) - Q(s,a))
double value = q + Alpha * (r + Gamma * maxQ - q); // q-learning
nextStateResult.QValue = value; // update

// is end state go to next episode
if (EndStates.Contains(nextStateResult.StateName))
break;

// Set the next state as the current state
state = StateLookup[nextStateResult.StateName];

} while (true);
}
}

double MaxQ(string stateName)
{
const double defaultValue = 0;

if(!StateLookup.ContainsKey(stateName))
return defaultValue;

QState state = StateLookup[stateName];
var actionsFromState = state.Actions;
double? maxValue = null;
foreach (var nextState in actionsFromState)
{
foreach (var actionResult in nextState.ActionsResult)
{
double value = actionResult.QEstimated;
if (value > maxValue || !maxValue.HasValue)
maxValue = value;
}
}

// no update
if (!maxValue.HasValue && ShowWarning)
QMethod.Log(string.Format("Warning: No MaxQ value for stateName {0}",
stateName) );

return maxValue.HasValue ? maxValue.Value : defaultValue;
}

public void PrintQLearningStructure()
{
Console.WriteLine("** Q-Learning structure **");
foreach (QState state in States)
{
Console.WriteLine("State {0}", state.StateName);
foreach (QAction action in state.Actions)
{
Console.WriteLine("  Action " + action.ActionName);
Console.Write(action.GetActionResults());
}
}
Console.WriteLine();
}

public void ShowPolicy()
{
Console.WriteLine("** Show Policy **");
foreach (QState state in States)
{
double max = Double.MinValue;
string actionName = "nothing";
foreach (QAction action in state.Actions)
{
foreach (QActionResult actionResult in action.ActionsResult)
{
if (actionResult.QEstimated > max)
{
max = actionResult.QEstimated;
actionName = action.ActionName.ToString();
}
}
}

Console.WriteLine(string.Format("From state {0} do action {1}, max QEstimated is {2}",
state.StateName, actionName, max.Pretty()));
}
}
}

class QState
{
public string StateName { get; private set; }
public List<QAction> Actions { get; private set; }

{
}

public QState(string stateName, QLearning q)
{
StateName = stateName;
Actions = new List<QAction>();
}

public override string ToString()
{
return string.Format("StateName {0}", StateName);
}
}

class QAction
{
private static readonly Random Rand = new Random();
public QActionName ActionName { get; internal set; }
public string CurrentState { get; private set; }
public List<QActionResult> ActionsResult { get; private set; }

{
}

public string GetActionResults()
{
var sb = new StringBuilder();
foreach (QActionResult actionResult in ActionsResult)
sb.AppendLine("     ActionResult " + actionResult);

return sb.ToString();
}

public QAction(string currentState, QActionName actionName = null)
{
CurrentState = currentState;
ActionsResult = new List<QActionResult>();
ActionName = actionName;
}

// The sum of action outcomes must be close to 1
public void ValidateActionsResultProbability()
{
const double epsilon = 0.1;

if (ActionsResult.Count == 0)
throw new ApplicationException(string.Format(
"ValidateActionsResultProbability is invalid, no action results:\n {0}",
this));

double sum = ActionsResult.Sum(a => a.Probability);
if (Math.Abs(1 - sum) > epsilon)
throw new ApplicationException(string.Format(
"ValidateActionsResultProbability is invalid:\n {0}", this));
}

public QActionResult PickActionByProbability()
{
double d = Rand.NextDouble();
double sum = 0;
foreach (QActionResult actionResult in ActionsResult)
{
sum += actionResult.Probability;
if (d <= sum)
return actionResult;
}

// we might get here if sum probability is below 1.0 e.g. 0.99
// and the d random value is 0.999
if (ActionsResult.Count > 0)
return ActionsResult.Last();

throw new ApplicationException(string.Format("No PickAction result: {0}", this));
}

public override string ToString()
{
double sum = ActionsResult.Sum(a => a.Probability);
return string.Format("ActionName {0} probability sum: {1} actionResultCount {2}",
ActionName, sum, ActionsResult.Count);
}
}

class QActionResult
{
public string StateName { get; internal set; }
public string PrevStateName { get; internal set; }
public double QValue { get; internal set; } // Q value is stored here
public double Probability { get; internal set; }
public double Reward { get; internal set; }

public double QEstimated
{
get { return QValue * Probability; }
}

public QActionResult(QAction action, string stateNameNext = null,
double probability = 1, double reward = 0)
{
PrevStateName = action.CurrentState;
StateName = stateNameNext;
Probability = probability;
Reward = reward;
}

public override string ToString()
{
return string.Format("State {0}, Prob. {1}, Reward {2}, PrevState {3}, QE {4}",
StateName, Probability.Pretty(), Reward, PrevStateName, QEstimated.Pretty());
}
}

public class QActionName
{
public string From { get; private set; }
public string To { get; private set; }

public QActionName(string from, string to = null)
{
From = from;
To = to;
}

public override string ToString()
{
return GetActionName();
}

public string GetActionName()
{
if (To == null)
return From;
return QMethod.ActionNameFromTo(From,To);
}
}

static class QMethod
{
public static void Log(string s)
{
Console.WriteLine(s);
}

public static readonly CultureInfo CultureEnUs = new CultureInfo("en-US");

public static string ToStringEnUs(this double d)
{
return d.ToString("G", CultureEnUs);
}
public static string Pretty(this double d)
{
}

public static string ActionNameFromTo(string a, string b)
{
return string.Format("from_{0}_to_{1}", a, b);
}

public static string EnumToString<T>(this T type)
{
return Enum.GetName(typeof(T), type);
}

public static void ValidateRange(double d, string origin = null)
{
if (d < 0 || d > 1)
{
string s = origin ?? string.Empty;
throw new ApplicationException(string.Format("ValidateRange error: {0} {1}", d, s));
}
}

public static void Validate(QLearning q)
{
foreach (var state in q.States)
{
foreach (var action in state.Actions)
{
action.ValidateActionsResultProbability();
}
}
}
}
}
```

Currently you have to know all the probability values in advance for the action outcomes, which are used for the Q-learning structure setup. The source code can easily be adapted if you need runtime knowledge of probability outcomes. Just add a probability function lookup and look at the Q-learning formula calculation area. Maybe I will look more into that at a later time.

Written by kunuk Nykjaer

January 14, 2012 at 7:53 pm