Software Programming

Kunuk Nykjaer

Posts Tagged ‘c#

Backtracking – Subset sum with C#

with 3 comments

Here’s an example of backtracking algorithm implemented in C#.
This solves the Subset sum

Subset sum problem is NP-complete and depending on your data set the running time can be very slow.



program.cs

using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Globalization;
using System.Linq;
using System.Threading;

/// <summary>
/// Author: Kunuk Nykjaer
/// Backtracking Algorithm in C#
/// Subset sum 
/// 
/// With list numbers and a given sum value
/// Find subset of list which adds up to the given sum value.
/// </summary>
public class Program
{
    static readonly Action<object> Write = Console.Write;
    static readonly Func<ConsoleKeyInfo> ReadKey = Console.ReadKey;

    // Composition root
    static void Main(string[] args)
    {
        Thread.CurrentThread.CurrentCulture = CultureInfo.GetCultureInfo("en-US");

        // Use timer for checking algo time
        var sw = new Stopwatch();
        sw.Start();


        // -----------------------------------------
        // ** INSERT YOUR DATA HERE **
        // Init config, 
        var config = new Config
        {
            MaxSolutions = 1,          // restrict result count
            ShowProgress = true,       // show backtrack progress
            ShowStatus = true,         // show iteration progress
            DisplaySolutions = true,   // Show solution list
            DetectCodeError = false,   // slower running time if true
        };

        // set
        var set = new List<float> { -7, -3, -2, 5, 8 };

        // subset sum
        var sum = 0;

        // ** END INSERT DATA **
        // -----------------------------------------


        // Init data
        var P = new Data(set, sum, config);

        // Init backtrack algo
        var bt = new Backtrack(Write, P);

        // Find solutions
        bt.Run();

        // Algo done, stop time
        sw.Stop();

        // Show results
        bt.Show();

        Write(string.Format("\nSec: {0}\nPress a key to exit\n",
            sw.ElapsedMilliseconds / 1000d));

        ReadKey();
    }
}

class Config
{
    public int MaxSolutions { get; set; }
    public bool ShowProgress { get; set; }
    public bool ShowStatus { get; set; }
    public bool DisplaySolutions { get; set; }
    public bool DetectCodeError { get; set; }
    public Config() { MaxSolutions = 1; }
}

class Backtrack
{
    readonly Action<object> _write;
    private readonly Data _p;
    private bool _maxSolutionsReachedExitBt;

    public Backtrack(Action<object> write, Data P)
    {
        _write = write;
        _p = P;
    }

    void UndoUpdate(Data P, Candidate c, int depth)
    {
        P.Remove(c);
        if (P.Config.ShowProgress) { Print(P, c, depth); }
    }

    void Update(Data P, Candidate c, int depth)
    {
        P.Add(c);
        if (P.Config.ShowProgress) { Print(P, c, depth); }
    }

    /*     
    wikipedia:
    http://en.wikipedia.org/wiki/Backtracking
     
    procedure bt(P,c)
        if reject(P,c) then return
        update(P,c) // update data
        if accept(P,c) then output(P,c)
        s ← first(P,c)
        while s ≠ Λ do
            bt(P,s)
            s ← next(P,s)
        end while 
        undoUpdate(P,c) // backtracking starts here
     end procedure
    */

    // O(?)
    void Bt(Data P, Candidate c, int depth)
    {
        if (Reject(P, c)) { return; }

        // Update data
        Update(P, c, depth);

        if (P.Config.ShowStatus) { ShowStatus(P, c); } // print status

        if (Accept(P, c)) { SaveResult(P, c); }

        if (_maxSolutionsReachedExitBt) { return; } // custom exit

        var s = First(P, c);

        while (s != null)
        {
            Bt(P, s, depth + 1);
            s = Next(P, s);
        }

        if (_maxSolutionsReachedExitBt) { return; } // custom exit

        // Leaf, dead end, backtrack, roll back data
        UndoUpdate(P, c, depth);
    }

    // O(1)
    bool Accept(Data P, Candidate c)
    {
        const float epsilon = 0.000001f;
        if (Math.Abs(P.Sum - P.WorkingSum) < epsilon) { return true; }

        return false;
    }

    // O(1)
    bool Reject(Data P, Candidate c)
    {
        if (_maxSolutionsReachedExitBt) { return true; }

        if (c == null) { throw new ArgumentNullException("c"); }

        var i = P.Numbers[c.Index];

        // Optimization vs. iterate all combinations
        // Reject if rest of sorted list contains positive numbers
        if (i >= 0 && i + P.WorkingSum > P.Sum){ return true; }

        return false;
    }

    public void Run()
    {
        var setsum = _p.Numbers.Sum();
        if (_p.Sum > setsum)
        {
            _write(string.Format("Sum: {0} is bigger than set sum {1}, no solutions exists\n",
                _p.Sum, setsum));
            return;
        }

        for (int i = 0; i < _p.Numbers.Length; i++)
        {
            Bt(_p, new Candidate { Index = i }, 1);
        }
    }

    public void Show()
    {
        _write(string.Format("Max solutions: {0}\n", _p.Config.MaxSolutions));
        _write(string.Format("Sum: {0}\n", _p.Sum));
        _write(string.Format("Set:\n"));
        _p.Numbers.ToList().ForEach(a => _write(a + ", "));

        if (_p.Config.DisplaySolutions)
        {
            _write(string.Format("\n\n*** Solutions:\n\n"));
            _p.SolutionsList.ForEach(_write);
        }

        _write(string.Format("\nUnique Paths Tried: {0}\n", _p.PathsTried));
        _write(string.Format("Solutions count: {0}\n", _p.Solutions));
    }

    // O(n) where n is path length
    void SaveResult(Data P, Candidate c)
    {
        P.Solutions++;

        if (P.Config.DisplaySolutions)
        {
            // Only save result data if to be displayed
            var list = P.Stack.ToList();
            list.Reverse();

            string numbers = list.Aggregate("", (a, b) => a + (P.Numbers[b] + ", "));
            string indexes = list.Aggregate("", (a, b) => a + (b + ", "));
            P.SolutionsList.Add(string.Format("Numbers: {0}\nIndexes: {1}\n\n", numbers, indexes));
        }


        if (P.Solutions >= P.Config.MaxSolutions) { _maxSolutionsReachedExitBt = true; }
    }

    // First valid child from path
    // O(1)
    Candidate First(Data P, Candidate c)
    {
        int j = c.Index + 1;
        if (j >= P.Numbers.Length) { return null; }

        return new Candidate { Index = j };
    }

    // Next sibling from current leaf
    // O(1)
    Candidate Next(Data P, Candidate c)
    {       
        int j = c.Index + 1;
        if (j >= P.Numbers.Length) { return null; }

        return new Candidate { Index = j };
    }

    void Print(Data P, Candidate c, int depth)
    {
        _write(string.Format("path: {0}  \tsum: {1}  \tdepth: {2}\n",
            P.Path.Get(), P.WorkingSum, depth));
    }

    void ShowStatus(Data P, Candidate c)
    {
        // Debug
        if (P.PathsTried % 500000 == 0)
        {
            _write(string.Format("Paths tried: {0:n0}  \tSolutions: {1}  \tPath: {2}\n",
                P.PathsTried, P.Solutions, P.Path.Get()));
        }
    }
}

class Candidate
{
    public int Index { get; set; }    
    public override string ToString()
    {
        return string.Format("{0}", Index);
    }
}

class Data
{
    // Init data
    public Config Config;
    public float Sum { get; private set; }
    public float[] Numbers { get; private set; }

    // Backtracking data
    public bool[] Path { get; private set; }
    public float WorkingSum { get; private set; }
    public Stack<int> Stack = new Stack<int>();        

    // Result data
    public int PathsTried;
    public List<string> SolutionsList = new List<string>();
    public int Solutions { get; set; }

    // Only used for Detect code error
    public HashSet<string> SetPathsTried = new HashSet<string>();

    public Data(List<float> numbers, float sum, Config config)
    {
        numbers.Sort(); // sort is used for reject condition in bt algo
        Numbers = numbers.ToArray();
        Path = new bool[Numbers.Length];
        Sum = sum;
        Config = config;
    }

    // O(1)
    public void Remove(Candidate c)
    {
        if (c == null) { throw new ArgumentNullException("c"); }

        var i = Stack.Pop();
        UpdatePath(false, i);
        WorkingSum -= Numbers[i];        
    }

    // O(1) (or O(n) if detectCodeError)
    public void Add(Candidate c)
    {
        if (c == null) { throw new ArgumentNullException("c"); }

        UpdatePath(true, c.Index);
        WorkingSum += Numbers[c.Index];
        Stack.Push(c.Index);
        PathsTried++;

        if (Config.DetectCodeError)
        {
            var path = Stack.GetString(); // O(n) where n is stack count
            if (SetPathsTried.Contains(path)) // O(1)
            {
                throw new ApplicationException(string.Format("Path already visited: {0}", path));
            }
            SetPathsTried.Add(path);
        }
    }

    // O(1)
    void UpdatePath(bool value, int index)
    {
        if (Path[index] == value)
        {
            throw new ApplicationException(string.Format("Path already set for index: {0}, value: {1}",
                index, Numbers[index]));
        }
        Path[index] = value;
    }
}

static class Extensions
{
    public static string GetString<T>(this IEnumerable<T> arr)
    {
        return arr.Aggregate("", (a, b) => a + string.Format("{0}->", b));
    }
    public static string Get(this IEnumerable<bool> arr)
    {
        return arr.Aggregate("", (a, b) => a + (b ? "1" : "0"));
    }
}

Examples of usage and results

Set including negative numbers

Find subset sum from set with negative and positive numbers.

Configuration is set as:
– MaxSolutions = 3,
– ShowProgress = false,
– ShowStatus = true,
– DisplaySolutions = true

Max solutions: 3
Sum: 100
Set:
-10, -1, 1, 2, 5, 10, 20, 20, 30, 40, 50,

*** Solutions:

Numbers: -10, -1, 1, 10, 20, 30, 50,
Indexes: 0, 1, 2, 5, 6, 8, 10,

Numbers: -10, -1, 1, 10, 20, 30, 50,
Indexes: 0, 1, 2, 5, 7, 8, 10,

Numbers: -10, -1, 1, 20, 20, 30, 40,
Indexes: 0, 1, 2, 6, 7, 8, 9,

Unique Paths Tried: 162
Solutions count: 3

Sec: 0.007

Solutions count only

Here we are interested of numbers of subset sum matching the sum value.

Configuration is set as:
– MaxSolutions = int.MaxValue,
– ShowProgress = false,
– ShowStatus = true,
– DisplaySolutions = false

Paths tried: 500.000    Solutions: 14810   Path: 110111010011000000110000000
Paths tried: 1.000.000  Solutions: 28743   Path: 101111011001111010011000000
Paths tried: 1.500.000  Solutions: 42862   Path: 100111010101001000001000000
Paths tried: 2.000.000  Solutions: 56129   Path: 100000000000000101110100000
Paths tried: 2.500.000  Solutions: 70302   Path: 010111110101101001100000000
Paths tried: 3.000.000  Solutions: 83851   Path: 010000010100111010010010000
Paths tried: 3.500.000  Solutions: 97535   Path: 001000100111100010000000000
Paths tried: 4.000.000  Solutions: 110731  Path: 000001011010011000111000000
Max solutions: 2147483647
Sum: 500
Set:
10, 10, 15, 15, 20, 20, 25, 25, 30, 30, 35, 35, 40, 40, 45, 45, 50, 
50, 55, 60, 65, 100, 200, 300, 400, 500, 600,

Unique Paths Tried: 4114568
Solutions count: 113508

Sec: 3.639

Floating numbers

Max solutions: 5
Sum: 3
Set:
-1, 0.1, 0.2, 0.5, 1, 1.5, 2, 2.5, 2.9,

*** Solutions:

Numbers: -1, 0.1, 1, 2.9,
Indexes: 0, 1, 4, 8,

Numbers: -1, 0.5, 1, 2.5,
Indexes: 0, 3, 4, 7,

Numbers: -1, 0.5, 1.5, 2,
Indexes: 0, 3, 5, 6,

Numbers: -1, 1.5, 2.5,
Indexes: 0, 5, 7,

Numbers: 0.1, 2.9,
Indexes: 1, 8,

Unique Paths Tried: 98
Solutions count: 5
Advertisements

Written by kunuk Nykjaer

December 25, 2012 at 2:22 pm

Posted in Algorithm, Csharp

Tagged with , ,

Backtracking – Knight’s Tour with C#

with 3 comments

Here’s an example of backtracking algorithm implemented in C#.
This solves the Knight’s Tour

program.cs

using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Linq;

/// <summary>
/// Author: Kunuk Nykjaer
/// Backtracking Algorithm in C#
/// Knight's Tour on NxN board
/// </summary>
public class Program
{
    static readonly Action<object> Write = Console.Write;
    static readonly Func<ConsoleKeyInfo> Read = Console.ReadKey;

    // Composition root
    static void Main(string[] args)
    {        
        // Use timer for checking algo time
        var sw = new Stopwatch();
        sw.Start();

        // Init board and set size
        var board = new Board(5, Write);

        // Init config
        var config = new Config
        {
            MaxSolutions = 1,
            ShowBoardProgress = false,
            ShowStatus = true,
            DetectCodeError = false
        };

        // Init data
        var P = new Data(config);

        // Init backtrack algo
        var bt = new Backtrack(board, Write, P);

        // Find solutions
        bt.Run();

        // Algo done, stop time
        sw.Stop();

        // Show results
        bt.Show();

        Write(string.Format("\nSec: {0}\nPress a key to exit\n",
            sw.ElapsedMilliseconds / 1000d));

        Read();
    }
}

class Backtrack
{
    readonly Board _board;
    readonly Action<object> _write;
    private readonly Data _p;    

    public Backtrack(Board board, Action<object> write, Data P)
    {
        _board = board;
        _write = write;        
        _p = P;
    }

    void UndoLastMove(Data P)
    {
        P.RemoveLast();

        // Debug
        if (!P.ExitBacktrack && P.Config.ShowBoardProgress)
        {
            _board.Print(P.Path);
        }
    }

    void UpdatePath(Data P, Vector c)
    {
        if (c == null)
        {
            throw new ArgumentNullException("c");
        }

        Cell last = P.Path.Last();
        Cell visit = last.AddVector(c);

        P.Add(visit);

        // Debug
        if (!P.ExitBacktrack && P.Config.ShowBoardProgress)
        {
            _board.Print(P.Path);
        }
    }

    /*     
    wikipedia:
    http://en.wikipedia.org/wiki/Backtracking
     
    procedure bt(P,c)
        if reject(P,c) then return        
        if accept(P,c) then output(P,c)                                  
        s ← first(P,c)
        while s ≠ Λ do
            bt(P,s)
            s ← next(P,s)
        end while
      
        // backtrack starts here 
     end procedure
    */

    // O(?)
    void Bt(Data P, Vector c)
    {
        if (Reject(P, c)) { return; }

        // Add path data
        UpdatePath(P, c);

        // Debug
        DebugEpoch(P, c);

        if (Accept(P)) { SaveResult(P); }

        var s = First(P);

        while (s != null)
        {
            Bt(P, s);
            s = Next(P, s);
        }

        // Leaf, dead end, backtrack, roll back path data
        UndoLastMove(P);
    }

    // O(1)
    bool Accept(Data P)
    {
        // Accept condition
        if (P.Path.Count >= _board.Size * _board.Size)
        {
            return true;
        }
        return false;
    }

    // O(1)
    bool Reject(Data P, Vector c)
    {
        // Custom early exit
        if (P.ExitBacktrack)
        {
            return true;
        }

        if (c == null)
        {
            return true;
        }

        Cell last = P.Path.Last(); // last visit
        Cell candidate = last.AddVector(c); // consider candidate
        if (P.Visited.Contains(candidate))
        {
            return true;
        }

        return !_board.IsInsideBoard(candidate);
    }

    void DebugEpoch(Data P, Vector c)
    {
        if (P.ExitBacktrack)
        {
            return;
        }

        // Debug
        if (P.Config.ShowStatus && P.PathTried.Count % 200000 == 0)
        {
            _write(string.Format("PathTried: {0:n0}, Solutions: {1}\n",
                P.PathTried.Count, P.Solutions.Count));
        }
    }

    public void Run()
    {
        Cell init = new Cell(0, 0); // init pos
        _p.Add(init);

        Vector first = First(_p);
        Bt(_p, first);
    }

    public void Show()
    {
        _write("\n-----------------\n");
        foreach (var s in _p.Solutions)
        {
            _board.Print(s);
            _write("-----------------\n");
        }
        _write(string.Format("\nPaths tried: {0:n0}\n", _p.PathTried.Count));
        _write(string.Format("\nSolutions found: {0}\n", _p.Solutions.Count));
    }

    // O(n) where n is path length
    void SaveResult(Data P)
    {
        P.Solutions.Add(P.Path.ToList());
        if (P.Solutions.Count >= P.Config.MaxSolutions)
        {
            P.ExitBacktrack = true;
        }
    }

    // First valid child from path
    // O(1)
    Vector First(Data P)
    {
        Cell last = P.Path.Last();
        IList<Vector> moves = _board.GetValidMoves(last);

        return moves.FirstOrDefault(v => !P.Visited.Contains(last.AddVector(v)));
    }

    // Next sibling from current leaf
    // O(1)
    Vector Next(Data P, Vector c)
    {
        return _board.Next(c);
    }
}

class XY
{
    public int X { get; set; }
    public int Y { get; set; }

    public XY(int x, int y)
    {
        X = x;
        Y = y;
    }

    public override int GetHashCode()
    {
        return ToString().GetHashCode();
    }
    public override string ToString()
    {
        return string.Format("[{0},{1}]", X, Y);
    }
    public override bool Equals(object obj)
    {
        var other = obj as XY;
        if (other == null) { return false; }

        return GetHashCode() == other.GetHashCode();
    }
}

// Position on board
class Cell : XY
{
    public Cell(int x, int y) : base(x, y) { }
}

// Move definition
class Vector : XY
{
    private static int _count;
    public int Id { get; private set; }
    public Vector(int x, int y)
        : base(x, y)
    {
        Id = _count++;
    }
}

class Board
{
    // Define move of Knight
    private static readonly Vector[] Moves = new[] { 
        new Vector(2, 1), 
        new Vector(1, 2),
        new Vector(-1, 2), 
        new Vector(-2, 1),
        new Vector(-2, -1), 
        new Vector(-1, -2),
        new Vector(1, -2),
        new Vector(2, -1)
    };

    private void CleanBoard()
    {
        // clearn
        foreach (var i in _board)
        {
            for (var j = 0; j < i.Length; j++)
            {
                i[j] = "--";
            }
        }
    }

    // Updating display data
    private void UpdateBoard(IList<Cell> path)
    {
        CleanBoard();
        for (var i = 0; i < path.Count; i++)
        {
            Cell p = path[i];
            _board[p.Y][p.X] = i.ToString("00");
        }
    }

    // Next movement
    public Vector Next(Vector m)
    {
        if (m.Id >= Moves.Length - 1)
        {
            return null;
        }
        return Moves[m.Id + 1];
    }

    public int Size { get; private set; }
    private readonly string[][] _board;
    private readonly Action<object> _write;
    public Board(int size, Action<object> write)
    {
        Size = size;
        _write = write;

        // Board is only used for printing display
        _board = new string[Size][];

        for (var i = 0; i < _board.Length; i++)
        {
            _board[i] = new string[Size];
        }
    }

    // O(1) because Moves.Count is always 8
    // Get valid moves within board
    public List<Vector> GetValidMoves(Cell c)
    {
        return Moves.Where(a => IsInsideBoard(c.AddVector(a))).ToList();
    }

    // O(1)
    public bool IsInsideBoard(Cell c)
    {
        return c.X >= 0 && c.X < Size && c.Y >= 0 && c.Y < Size;
    }

    public void Print(List<Cell> p)
    {
        UpdateBoard(p);

        _write("\n");
        foreach (var y in _board)
        {
            foreach (var x in y)
            {
                _write(x + " ");
            }
            _write("\n\n");
        }
    }
}

class Data
{
    public bool ExitBacktrack { get; set; }
    public Config Config;

    public readonly List<Cell> Path = new List<Cell>();
    public readonly HashSet<Cell> Visited = new HashSet<Cell>();
    public readonly List<List<Cell>> Solutions = new List<List<Cell>>();
    public readonly HashSet<string> PathTried = new HashSet<string>();

    public Data(Config config)
    {
        Config = config;
    }

    // O(1)
    public void RemoveLast()
    {
        Cell cell = Path.Last(); // O(1)
        if (Config.DetectCodeError && !Visited.Contains(cell))
        {
            throw new ApplicationException("Can't remove not added cell");
        }
        Visited.Remove(cell); // O(1)
        Path.RemoveAt(Path.Count - 1); // O(1) remove last
    }

    // O(1) or O(n)
    public void Add(Cell cell)
    {
        if (Config.DetectCodeError && Visited.Contains(cell))
        {
            throw new ApplicationException("Can't add duplicate visited cell");
        }
        Visited.Add(cell);
        Path.Add(cell);

        // O(n)
        // Catch error in code, but expensive to use in running time
        if (Config.DetectCodeError)
        {
            var path = Path.GetString();
            if (PathTried.Contains(path))
            {
                throw new Exception("Error, path already tried");
            }
            PathTried.Add(path);
        }
        //O(1)
        else
        {
            PathTried.Add((PathTried.Count + 1).ToString());
        }

    }
}

class Config
{
    public int MaxSolutions { get; set; }
    public bool ShowBoardProgress { get; set; }
    public bool ShowStatus { get; set; }
    public bool DetectCodeError { get; set; } // cost running time if true
}

static class Extensions
{
    public static string GetString<T>(this IEnumerable<T> arr)
    {
        return arr.Aggregate("", (a, b) => a + "->" + b);
    }
    public static Cell AddVector(this Cell c, Vector v)
    {
        return new Cell(c.X + v.X, c.Y + v.Y);
    }
}

Examples:

Knight’s Tour on a 5 x 5 board

00 13 18 07 24

05 08 01 12 17

14 19 06 23 02

09 04 21 16 11

20 15 10 03 22
-----------------

Paths tried: 8,840
Sec: 0.202

below 1 sec. for first result.

Knight’s Tour on a 6 x 6 board

00 33 16 31 22 35

15 24 01 34 17 30

06 11 32 23 02 21

25 14 07 20 29 18

10 05 12 27 08 03

13 26 09 04 19 28
-----------------

Paths tried: 248,169
Sec: 6.024

About 6 seconds for first result.

Knight’s Tour on a 7 x 7 board

00 31 38 27 40 23 48

37 28 01 24 21 26 41

30 19 32 39 02 47 22

07 36 29 20 25 42 03

18 15 08 33 44 11 46

35 06 13 16 09 04 43

14 17 34 05 12 45 10
-----------------

Paths tried: 7,151,179
Sec: 155.465

About 2.5 minutes for first result.

Knight’s Tour on a 8 x 8 board

00 37 58 35 42 47 56 51

59 34 01 48 57 50 43 46

38 31 36 41 02 45 52 55

33 60 39 26 49 54 03 44

30 09 32 61 40 25 22 53

17 62 27 10 23 20 13 04

08 29 18 15 06 11 24 21

63 16 07 28 19 14 05 12
-----------------

Paths tried: 8,250,733
Sec: 186.499

About 3 minutes for first result.

Written by kunuk Nykjaer

December 23, 2012 at 10:47 am

Posted in Algorithm, Csharp

Tagged with , ,

Running time analysis – hashset used for running time optimization – C# example

leave a comment »

Imagine you want to find the list of distinct numbers of all possible duplicate numbers
from two lists with unknown length and unknown content.
You want to do it as fastest as possible.

e.g.
List a = [5,2,2,6,4,3,3]
List b = [2,2,2,1,3,4,3,5]
You should find [2,3,4,5]

How would you do it?

Do you use the usual nested for loops iteration?
If you don’t mind using some extra memory you could use the hashset trick.
See the code below for the examples.

If you worry about the memory you could sort the lists
and iterate them which takes in total O(nlogn) + O(mlogm). That is not covered here.

The hashset technique takes O(n+m) which is faster.

If you don’t know that a hashset is here are some wiki readings:
http://en.wikipedia.org/wiki/Hash_table
http://en.wikipedia.org/wiki/Set_(abstract_data_type)

The code shows two styles: one that runs O(n*m) and one that runs O(n+m) with different implementations.
I will use C# Linq also to show you how it can perform bad when you don’t use it correct (ver2 vs. ver4 in the code)

Source code and demo.
Program.cs

using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Linq;

/// <summary>
/// Author: Kunuk Nykjaer
/// </summary>
public class Program
{
    const int Lengh = 1000;
    const int Range = 10000;
    static readonly Random Random = new Random();
    static readonly List<int> List1 = new List<int>();
    static readonly List<int> List2 = new List<int>();
    
    public static void Main(string[] args)
    {
        for (int i = 0; i < Lengh; i++)
            List1.Add(Random.Next(Range));
        for (int i = 0; i < Lengh; i++)
            List2.Add(Random.Next(Range));

        Console.WriteLine("n and m has length: {0}", Lengh);

        RunningTime1();
        RunningTime2();
        RunningTime3();
        RunningTime4();
        RunningTime5();

        Console.WriteLine("press.."); Console.ReadKey();
    }
      
    private static void RunningTime1()
    {
        Console.WriteLine("1.  O(n*m)  Classic nested for loops");
        var sw = new Stopwatch();
        sw.Start(); //O(1)
        var equals = new HashSet<int>();
        foreach (var i in List1) //O(n)
            foreach (var j in List2) //O(m)
                if (i == j) //O(1)
                    equals.Add(i); //O(1)

        sw.Stop();

        Console.WriteLine("Elapsed msec: {0}, equals len: {1}", 
            sw.ElapsedMilliseconds, equals.Count);
    }
    
    private static void RunningTime2()
    {
        Console.WriteLine("2.  O(n*m)  Linq version quck style");
        var sw = new Stopwatch();
        sw.Start();

        // from... +    Distinct 
        //O(n*m)   +   O((n+m)/2)    )
        var equals = (from i in List1 from j in List2 where i == j select i).
            Distinct().ToList();

        sw.Stop();

        Console.WriteLine("Elapsed msec: {0}, equals len: {1}", 
            sw.ElapsedMilliseconds, equals.Count);
    }
    
    private static void RunningTime3()
    {
        Console.WriteLine("3.  O(n+m)  Linq");        
        var sw = new Stopwatch();
        sw.Start();

        var equals = new HashSet<int>(); //O(1)
        var set = new HashSet<int>(); //O(1)

        foreach (var i in List1) //O(n)
            set.Add(i); //O(1)

        foreach (int i in List2.Where(set.Contains)) //O(m)
            equals.Add(i); //O(1)

        sw.Stop();
        Console.WriteLine("Elapsed msec: {0}, equals len: {1}", 
            sw.ElapsedMilliseconds, equals.Count);
    }
    
    private static void RunningTime4()
    {
        Console.WriteLine("4.  O(n+m) foreach loop");
        
        var sw = new Stopwatch();
        var equals = new HashSet<int>(); //O(1)
        var set = new HashSet<int>(); //O(1)
        sw.Start();
        foreach (var i in List1) //O(n)
            set.Add(i);
        foreach (int i in List2) //O(m)
            if (set.Contains(i)) //O(1)
                equals.Add(i); //O(1)

        sw.Stop();
        Console.WriteLine("Elapsed msec: {0}, equals len: {1}", 
            sw.ElapsedMilliseconds, equals.Count);
    }
    
    private static void RunningTime5()
    {
        Console.WriteLine("5.  O(n+m) for loop");
        
        var sw = new Stopwatch();
        sw.Start();

        var equals = new HashSet<int>(); //O(1)
        var set = new HashSet<int>(); //O(1)

        int count1 = List1.Count; //O(1)
        for (int i = 0; i < count1; i++) //O(n)
            set.Add(List1[i]); //O(1)

        int count2 = List2.Count; //O(1)
        for (int i = 0; i < count2; i++) //O(m)
        {
            int j = List2[i]; //O(1)
            if (set.Contains(j)) //O(1)
                equals.Add(j); //O(1)
        }
        
        sw.Stop();
        Console.WriteLine("Elapsed msec: {0}, equals len: {1}", 
            sw.ElapsedMilliseconds, equals.Count);
    }
}

O(n*m) and O(n+m)

n and m has length: 10000
1.  O(n*m)  Classic nested for loops
Elapsed msec: 1582, equals len: 900
2.  O(n*m)  Linq version quck style
Elapsed msec: 8446, equals len: 900
3.  O(n+m)  Linq
Elapsed msec: 8, equals len: 900
4.  O(n+m) foreach loop
Elapsed msec: 1, equals len: 900
5.  O(n+m) for loop
Elapsed msec: 1, equals len: 900

Ver2 is a bad implementation using Linq. Linq can cost you running time if you use it poorly.

O(n*m) and O(n+m)

n and m has length: 20000
1.  O(n*m)  Classic nested for loops
Elapsed msec: 6285, equals len: 3234
2.  O(n*m)  Linq version quck style
Elapsed msec: 34932, equals len: 3234
3.  O(n+m)  Linq
Elapsed msec: 10, equals len: 3234
4.  O(n+m) foreach loop
Elapsed msec: 4, equals len: 3234
5.  O(n+m) for loop
Elapsed msec: 3, equals len: 3234

The poor version takes 30 sec. and the hashset trick are maximum 10 msec.

Here is a test with larger list, the O(n*m) versions were to slow to include.
O(n+m) with large list

n and m has length: 1000000
3.  O(n+m)  Linq
Elapsed msec: 505, equals len: 90594
4.  O(n+m) foreach loop
Elapsed msec: 537, equals len: 90594
5.  O(n+m) for loop
Elapsed msec: 499, equals len: 90594

Even for large list, the time is below 1 sec.
For nested loops it would have taken 12 days!
(source: Algorithm Design book by Jon Kleinberg and Éva Tardos, chapter 2, O(n*n) where n=1.000.000)

The lesson to take is:
When you are about to code something using nested for loops, first see if the list is large, if yes consider using this trick if fast running time is important.

Written by kunuk Nykjaer

March 4, 2012 at 12:52 am

Q-learning Library example with C#

with 10 comments

This is a follow up of my previous post – Q-learning example with Java
https://kunuk.wordpress.com/2010/09/24/q-learning/

q-learn-bender
Imagine you had a robot, just like Bender here and you want it to learn to navigate correctly to the destination (the beer). Your possible moves are move up, right, down or left from any room. And you want some kind of automatic system that gives you the solution. This is what this library can do for you. You configure the code with possible states, possible actions and the action outcomes. E.g. when you move right from the bedroom the action result is Bender has moved to the Hallway and if you move left from the bedroom then Bender hits the wall and stays in the bedroom. You will need to set up a reward value when Bender reaches the beer e.g. 100 points. That’s it!
No logic programming from your part, just some configuration.

DISCLAIMER:
There might be a bug in the implementation. Read at the comments section from Jens for more info.

In my previous post I made a quick example in Java and this time I will use C#.
This version will support non-deterministic action outcomes and I will include examples of how to use this library. The code examples are included at the bottom. You need QLearning.cs to run these examples.

  • Path finding Bender
  • Path finding demo (same example as in my previous post)
  • Rock paper scissors demo
  • Path finding advanced demo

Path finding Bender example
Path finding Bender result:

** Q-Learning structure **
State Bedroom
  Action up
     ActionResult State Bedroom, Prob. 1, Reward 0, PrevState Bedroom, QE 72.9
  Action right
     ActionResult State Hallway, Prob. 1, Reward 0, PrevState Bedroom, QE 81
  Action down
     ActionResult State Bedroom, Prob. 1, Reward 0, PrevState Bedroom, QE 72.9
  Action left
     ActionResult State Bedroom, Prob. 1, Reward 0, PrevState Bedroom, QE 72.9
State Hallway
  Action up
     ActionResult State Hallway, Prob. 1, Reward 0, PrevState Hallway, QE 81
  Action right
     ActionResult State Hallway, Prob. 1, Reward 0, PrevState Hallway, QE 81
  Action down
     ActionResult State Stairwell, Prob. 1, Reward 0, PrevState Hallway, QE 90
  Action left
     ActionResult State Bedroom, Prob. 1, Reward 0, PrevState Hallway, QE 72.9
State Stairwell
  Action up
     ActionResult State Hallway, Prob. 1, Reward 0, PrevState Stairwell, QE 81
  Action right
     ActionResult State Stairwell, Prob. 1, Reward 0, PrevState Stairwell, QE 90
  Action down
     ActionResult State Stairwell, Prob. 1, Reward 0, PrevState Stairwell, QE 90
  Action left
     ActionResult State Kitchen, Prob. 1, Reward 100, PrevState Stairwell, QE 100

** Show Policy **
From state Bedroom do action right, max QEstimated is 81
From state Hallway do action down, max QEstimated is 90
From state Stairwell do action left, max QEstimated is 100

Bender has learned the fastest way to get some beer. He knows which direction to take from any room.

Path finding example
q-learn1

Path finding result:

** Q-Learning structure **
State A
  Action from_A_to_B
     ActionResult State B, Prob. 1, Reward 0, PrevState A, QE 90
  Action from_A_to_D
     ActionResult State D, Prob. 1, Reward 0, PrevState A, QE 72.9
State B
  Action from_B_to_A
     ActionResult State A, Prob. 1, Reward 0, PrevState B, QE 81
  Action from_B_to_C
     ActionResult State C, Prob. 1, Reward 100, PrevState B, QE 100
  Action from_B_to_E
     ActionResult State E, Prob. 1, Reward 0, PrevState B, QE 81
State C
  Action from_C_to_C
     ActionResult State C, Prob. 1, Reward 0, PrevState C, QE 0
State D
  Action from_D_to_A
     ActionResult State A, Prob. 1, Reward 0, PrevState D, QE 81
  Action from_D_to_E
     ActionResult State E, Prob. 1, Reward 0, PrevState D, QE 81
State E
  Action from_E_to_B
     ActionResult State B, Prob. 1, Reward 0, PrevState E, QE 90
  Action from_E_to_D
     ActionResult State D, Prob. 1, Reward 0, PrevState E, QE 72.9
  Action from_E_to_F
     ActionResult State F, Prob. 1, Reward 0, PrevState E, QE 90
State F
  Action from_F_to_C
     ActionResult State C, Prob. 1, Reward 100, PrevState F, QE 100
  Action from_F_to_E
     ActionResult State E, Prob. 1, Reward 0, PrevState F, QE 81

** Show Policy **
From state A do action from_A_to_B, max QEstimated is 90
From state B do action from_B_to_C, max QEstimated is 100
From state C do action from_C_to_C, max QEstimated is 0
From state D do action from_D_to_A, max QEstimated is 81
From state E do action from_E_to_B, max QEstimated is 90
From state F do action from_F_to_C, max QEstimated is 100

The results are exactly the same as in my previous post.

rock-paper-scissors

Rock paper scissors result:

** Q-Learning structure **
State Begin
  Action from_Begin_to_Rock
     ActionResult State Rock, Prob. 0.2, Reward 0, PrevState Begin, QE 0
     ActionResult State Paper, Prob. 0.5, Reward -10, PrevState Begin, QE -0.91
     ActionResult State Scissor, Prob. 0.3, Reward 100, PrevState Begin, QE 4.11

  Action from_Begin_to_Paper
     ActionResult State Rock, Prob. 0.2, Reward 100, PrevState Begin, QE 2.44
     ActionResult State Paper, Prob. 0.5, Reward 0, PrevState Begin, QE 0
     ActionResult State Scissor, Prob. 0.3, Reward -10, PrevState Begin, QE -0.41
  Action from_Begin_to_Scissor
     ActionResult State Rock, Prob. 0.2, Reward -10, PrevState Begin, QE -0.24
     ActionResult State Paper, Prob. 0.5, Reward 100, PrevState Begin, QE 9.09
     ActionResult State Scissor, Prob. 0.3, Reward 0, PrevState Begin, QE 0

** Show Policy **
From state Begin do action from_Begin_to_Scissor, max QEstimated is 9.09

** Opponent style **
style is rock 0.2 paper 0.5 scissor 0.3

Here the opponents style is to pick paper 50% of the time. The robot learns correctly to pick scissors as the action policy for maximizing the reward outcome.

Path finding advanced example
q-learn-adv

In the advanced version of path finding, there is a hill from state B to C.
whenever the robot wants to go climb the hill from state B to C, there is a 10% of success and 90% of chance it will slide back to state A.

Path finding advanced example result:

** Q-Learning structure **
State A
  Action from_A_to_B
     ActionResult State B, Prob. 1, Reward 0, PrevState A, QE 72.9
  Action from_A_to_D
     ActionResult State D, Prob. 1, Reward 0, PrevState A, QE 72.9
State B
  Action from_B_to_A
     ActionResult State A, Prob. 1, Reward 0, PrevState B, QE 65.61
  Action from_B_to_C
     ActionResult State C, Prob. 0.1, Reward 100, PrevState B, QE 1.1
     ActionResult State A, Prob. 0.9, Reward 0, PrevState B, QE 31.08
  Action from_B_to_E
     ActionResult State E, Prob. 1, Reward 0, PrevState B, QE 81
State C
  Action from_C_to_C
     ActionResult State C, Prob. 1, Reward 0, PrevState C, QE 0
State D
  Action from_D_to_A
     ActionResult State A, Prob. 1, Reward 0, PrevState D, QE 65.61
  Action from_D_to_E
     ActionResult State E, Prob. 1, Reward 0, PrevState D, QE 81
State E
  Action from_E_to_B
     ActionResult State B, Prob. 1, Reward 0, PrevState E, QE 72.9
  Action from_E_to_D
     ActionResult State D, Prob. 1, Reward 0, PrevState E, QE 72.9
  Action from_E_to_F
     ActionResult State F, Prob. 1, Reward 0, PrevState E, QE 90
State F
  Action from_F_to_C
     ActionResult State C, Prob. 1, Reward 100, PrevState F, QE 100
  Action from_F_to_E
     ActionResult State E, Prob. 1, Reward 0, PrevState F, QE 81

** Show Policy **
From state A do action from_A_to_B, max QEstimated is 72.9
From state B do action from_B_to_E, max QEstimated is 81
From state C do action from_C_to_C, max QEstimated is 0
From state D do action from_D_to_E, max QEstimated is 81
From state E do action from_E_to_F, max QEstimated is 90
From state F do action from_F_to_C, max QEstimated is 100

The robot has learned to take the action ‘go to state E’ instead of ‘go to state C’ when it is in state B.
10% of success is not good enough and the robot wisely chooses a longer but more rewarding path.

The data structure is as follows:
The QLearning algorithm has multiple states, for every state there are multiple possible actions and for each action there are multiple possible action outcomes.
q-learn

PathFindingBenderDemo.cs

using System;
using QLearningFramework;

namespace ConsoleQLearning
{
    class PathFindingBenderDemo
    {
        // ----------- Insert the state names here -----------
        internal enum StateNameEnum
        {
            Bedroom, Hallway, Stairwell, Kitchen
        }
        // ----------- End Insert the state names here -------

        static void Main(string[] args)
        {
            DateTime starttime = DateTime.Now;

            PathFinding();            

            double timespend = DateTime.Now.Subtract(starttime).TotalSeconds;
            Console.WriteLine("\n{0} sec. press a key ...", timespend.Pretty()); Console.ReadKey();
        }         

        static void PathFinding()
        {
            QLearning q = new QLearning();
            QAction fromTo;
            QState state;
            string stateName;
            string stateNameNext;

            // ----------- Begin Insert the path setup here -----------
            // insert the end states here, e.g. goal state
            q.EndStates.Add(StateNameEnum.Kitchen.EnumToString()); 

            // State Bedroom           
            stateName = StateNameEnum.Bedroom.EnumToString();
            q.AddState(state = new QState(stateName, q));
            // action up
            stateNameNext = StateNameEnum.Bedroom.EnumToString();
            state.AddAction(fromTo = new QAction(stateName, new QActionName("up")));
            fromTo.AddActionResult(new QActionResult(fromTo, stateNameNext, 1.0));
            // action right
            stateNameNext = StateNameEnum.Hallway.EnumToString();
            state.AddAction(fromTo = new QAction(stateName, new QActionName("right")));
            fromTo.AddActionResult(new QActionResult(fromTo, stateNameNext, 1.0));
            // action down
            stateNameNext = StateNameEnum.Bedroom.EnumToString();
            state.AddAction(fromTo = new QAction(stateName, new QActionName("down")));
            fromTo.AddActionResult(new QActionResult(fromTo, stateNameNext, 1.0));
            // action left
            stateNameNext = StateNameEnum.Bedroom.EnumToString();
            state.AddAction(fromTo = new QAction(stateName, new QActionName("left")));
            fromTo.AddActionResult(new QActionResult(fromTo, stateNameNext, 1.0));

            // State Hallway           
            stateName = StateNameEnum.Hallway.EnumToString();
            q.AddState(state = new QState(stateName, q));
            // action up
            stateNameNext = StateNameEnum.Hallway.EnumToString();
            state.AddAction(fromTo = new QAction(stateName, new QActionName("up")));
            fromTo.AddActionResult(new QActionResult(fromTo, stateNameNext, 1.0));
            // action right
            stateNameNext = StateNameEnum.Hallway.EnumToString();
            state.AddAction(fromTo = new QAction(stateName, new QActionName("right")));
            fromTo.AddActionResult(new QActionResult(fromTo, stateNameNext, 1.0));
            // action down
            stateNameNext = StateNameEnum.Stairwell.EnumToString();
            state.AddAction(fromTo = new QAction(stateName, new QActionName("down")));
            fromTo.AddActionResult(new QActionResult(fromTo, stateNameNext, 1.0));
            // action left
            stateNameNext = StateNameEnum.Bedroom.EnumToString();
            state.AddAction(fromTo = new QAction(stateName, new QActionName("left")));
            fromTo.AddActionResult(new QActionResult(fromTo, stateNameNext, 1.0));

            // State Stairwell           
            stateName = StateNameEnum.Stairwell.EnumToString();
            q.AddState(state = new QState(stateName, q));
            // action up
            stateNameNext = StateNameEnum.Hallway.EnumToString();
            state.AddAction(fromTo = new QAction(stateName, new QActionName("up")));
            fromTo.AddActionResult(new QActionResult(fromTo, stateNameNext, 1.0));
            // action right
            stateNameNext = StateNameEnum.Stairwell.EnumToString();
            state.AddAction(fromTo = new QAction(stateName, new QActionName("right")));
            fromTo.AddActionResult(new QActionResult(fromTo, stateNameNext, 1.0));
            // action down
            stateNameNext = StateNameEnum.Stairwell.EnumToString();
            state.AddAction(fromTo = new QAction(stateName, new QActionName("down")));
            fromTo.AddActionResult(new QActionResult(fromTo, stateNameNext, 1.0));
            // action left
            stateNameNext = StateNameEnum.Kitchen.EnumToString();
            state.AddAction(fromTo = new QAction(stateName, new QActionName("left")));
            fromTo.AddActionResult(new QActionResult(fromTo, stateNameNext, 1.0){Reward = 100});            
            // ----------- End Insert the path setup here -----------

            q.RunTraining();
            q.PrintQLearningStructure();
            q.ShowPolicy();
        }
    }
}

PathFindingDemo.cs

using System;
using QLearningFramework;

namespace ConsoleQLearning
{
    class PathFindingDemo
    {
        // ----------- Insert the state names here -----------
        internal enum StateNameEnum
        {
            A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P, Q, R, S, T, U, 
            V, W, X, Y, Z
        }
        // ----------- End Insert the state names here -------

        static void Main(string[] args)
        {
            DateTime starttime = DateTime.Now;

            PathFinding();            

            double timespend = DateTime.Now.Subtract(starttime).TotalSeconds;
            Console.WriteLine("\n{0} sec. press a key ...", timespend.Pretty()); Console.ReadKey();
        }         

        static void PathFinding()
        {
            QLearning q = new QLearning { Episodes = 1000, Alpha = 0.1, Gamma = 0.9, 
                MaxExploreStepsWithinOneEpisode = 1000 };

            QAction fromTo;
            QState state;
            string stateName;
            string stateNameNext;

            // ----------- Begin Insert the path setup here -----------
            // insert the end states here, e.g. goal state
            q.EndStates.Add(StateNameEnum.C.EnumToString()); 


            // State A           
            stateName = StateNameEnum.A.EnumToString();
            q.AddState(state = new QState(stateName, q));
            // action A -> B
            stateNameNext = StateNameEnum.B.EnumToString();
            state.AddAction(fromTo = new QAction(stateName, new QActionName(stateName, stateNameNext) ));
            // action outcome probability
            fromTo.AddActionResult(new QActionResult(fromTo, stateNameNext, 1.0));
            // action A -> D
            stateNameNext = StateNameEnum.D.EnumToString();
            state.AddAction(fromTo = new QAction(stateName, new QActionName(stateName, stateNameNext)));
            // action outcome probability
            fromTo.AddActionResult(new QActionResult(fromTo, stateNameNext, 1.0));

            // State B
            stateName = StateNameEnum.B.EnumToString();
            q.States.Add(state = new QState(stateName, q));
            // B -> A
            stateNameNext = StateNameEnum.A.EnumToString();
            state.AddAction(fromTo = new QAction(stateName, new QActionName(stateName, stateNameNext)));
            fromTo.AddActionResult(new QActionResult(fromTo, stateNameNext, 1.0));
            // B -> C
            stateNameNext = StateNameEnum.C.EnumToString();
            state.AddAction(fromTo = new QAction(stateName, new QActionName(stateName, stateNameNext)));
            // action outcome probability
            fromTo.AddActionResult(new QActionResult(fromTo, stateNameNext, 1.0) { Reward = 100 });
            // B -> E
            stateNameNext = StateNameEnum.E.EnumToString();
            state.AddAction(fromTo = new QAction(stateName, new QActionName(stateName, stateNameNext)));
            fromTo.AddActionResult(new QActionResult(fromTo, stateNameNext, 1.0));

            // State C
            stateName = StateNameEnum.C.EnumToString();
            q.States.Add(state = new QState(stateName, q));
            // C -> C
            stateNameNext = stateName;
            state.AddAction(fromTo = new QAction(stateName, new QActionName(stateName, stateNameNext)));
            fromTo.AddActionResult(new QActionResult(fromTo, stateNameNext, 1.0));

            // State D
            stateName = StateNameEnum.D.EnumToString();
            q.States.Add(state = new QState(stateName, q));
            // D -> A
            stateNameNext = StateNameEnum.A.EnumToString();
            state.AddAction(fromTo = new QAction(stateName, new QActionName(stateName, stateNameNext)));
            fromTo.AddActionResult(new QActionResult(fromTo, stateNameNext, 1.0));
            // D -> E
            stateNameNext = StateNameEnum.E.EnumToString();
            state.AddAction(fromTo = new QAction(stateName, new QActionName(stateName, stateNameNext)));
            fromTo.AddActionResult(new QActionResult(fromTo, stateNameNext, 1.0));

            // State E
            stateName = StateNameEnum.E.EnumToString();
            q.States.Add(state = new QState(stateName, q));
            // E -> B
            stateNameNext = StateNameEnum.B.EnumToString();
            state.AddAction(fromTo = new QAction(stateName, new QActionName(stateName, stateNameNext)));
            fromTo.AddActionResult(new QActionResult(fromTo, stateNameNext, 1.0));
            // E -> D
            stateNameNext = StateNameEnum.D.EnumToString();
            state.AddAction(fromTo = new QAction(stateName, new QActionName(stateName, stateNameNext)));
            fromTo.AddActionResult(new QActionResult(fromTo, stateNameNext, 1.0));
            // E -> F
            stateNameNext = StateNameEnum.F.EnumToString();
            state.AddAction(fromTo = new QAction(stateName, new QActionName(stateName, stateNameNext)));
            fromTo.AddActionResult(new QActionResult(fromTo, stateNameNext, 1.0));

            // State F
            stateName = StateNameEnum.F.EnumToString();
            q.States.Add(state = new QState(stateName, q));
            // F -> C
            stateNameNext = StateNameEnum.C.EnumToString();
            state.AddAction(fromTo = new QAction(stateName, new QActionName(stateName, stateNameNext)));
            fromTo.AddActionResult(new QActionResult(fromTo, stateNameNext, 1.0) { Reward = 100 });
            // F -> E
            stateNameNext = StateNameEnum.E.EnumToString();
            state.AddAction(fromTo = new QAction(stateName, new QActionName(stateName, stateNameNext)));
            fromTo.AddActionResult(new QActionResult(fromTo, stateNameNext, 1.0));

            // ----------- End Insert the path setup here -----------

            q.RunTraining();
            q.PrintQLearningStructure();
            q.ShowPolicy();
        }
    }
}

PathFindingAdvDemo.cs

using System;
using QLearningFramework;

namespace ConsoleQLearning
{
    class PathFindingAdvDemo
    {
        // ----------- Insert the state names here -----------
        internal enum StateNameEnum
        {
            A, B, C, D, E, F
        }
        // ----------- End Insert the state names here -------

        static void Main(string[] args)
        {
            DateTime starttime = DateTime.Now;

            PathFinding();            

            double timespend = DateTime.Now.Subtract(starttime).TotalSeconds;
            Console.WriteLine("\n{0} sec. press a key ...", timespend.Pretty()); Console.ReadKey();
        }         

        static void PathFinding()
        {
            QLearning q = new QLearning { Episodes = 1000, Alpha = 0.1, Gamma = 0.9, 
                MaxExploreStepsWithinOneEpisode = 1000 };

            QAction fromTo;
            QState state;
            string stateName;
            string stateNameNext;

            // ----------- Begin Insert the path setup here -----------
            // insert the end states here, e.g. goal state
            q.EndStates.Add(StateNameEnum.C.EnumToString()); 


            // State A           
            stateName = StateNameEnum.A.EnumToString();
            q.AddState(state = new QState(stateName, q));
            // action A -> B
            stateNameNext = StateNameEnum.B.EnumToString();
            state.AddAction(fromTo = new QAction(stateName, new QActionName(stateName, stateNameNext) ));
            // action outcome probability
            fromTo.AddActionResult(new QActionResult(fromTo, stateNameNext, 1.0));
            // action A -> D
            stateNameNext = StateNameEnum.D.EnumToString();
            state.AddAction(fromTo = new QAction(stateName, new QActionName(stateName, stateNameNext)));
            // action outcome probability
            fromTo.AddActionResult(new QActionResult(fromTo, stateNameNext, 1.0));

            // State B
            stateName = StateNameEnum.B.EnumToString();
            q.States.Add(state = new QState(stateName, q));
            // B -> A
            stateNameNext = StateNameEnum.A.EnumToString();
            state.AddAction(fromTo = new QAction(stateName, new QActionName(stateName, stateNameNext)));
            fromTo.AddActionResult(new QActionResult(fromTo, stateNameNext, 1.0));
            // B -> C
            stateNameNext = StateNameEnum.C.EnumToString();
            state.AddAction(fromTo = new QAction(stateName, new QActionName(stateName, stateNameNext)));
            // action outcome probability
            fromTo.AddActionResult(new QActionResult(fromTo, StateNameEnum.C.EnumToString(), 0.1) { Reward = 100 });
            fromTo.AddActionResult(new QActionResult(fromTo, StateNameEnum.A.EnumToString(), 0.9));
            // B -> E
            stateNameNext = StateNameEnum.E.EnumToString();
            state.AddAction(fromTo = new QAction(stateName, new QActionName(stateName, stateNameNext)));
            fromTo.AddActionResult(new QActionResult(fromTo, stateNameNext, 1.0));

            // State C
            stateName = StateNameEnum.C.EnumToString();
            q.States.Add(state = new QState(stateName, q));
            // C -> C
            stateNameNext = stateName;
            state.AddAction(fromTo = new QAction(stateName, new QActionName(stateName, stateNameNext)));
            fromTo.AddActionResult(new QActionResult(fromTo, stateNameNext, 1.0));

            // State D
            stateName = StateNameEnum.D.EnumToString();
            q.States.Add(state = new QState(stateName, q));
            // D -> A
            stateNameNext = StateNameEnum.A.EnumToString();
            state.AddAction(fromTo = new QAction(stateName, new QActionName(stateName, stateNameNext)));
            fromTo.AddActionResult(new QActionResult(fromTo, stateNameNext, 1.0));
            // D -> E
            stateNameNext = StateNameEnum.E.EnumToString();
            state.AddAction(fromTo = new QAction(stateName, new QActionName(stateName, stateNameNext)));
            fromTo.AddActionResult(new QActionResult(fromTo, stateNameNext, 1.0));

            // State E
            stateName = StateNameEnum.E.EnumToString();
            q.States.Add(state = new QState(stateName, q));
            // E -> B
            stateNameNext = StateNameEnum.B.EnumToString();
            state.AddAction(fromTo = new QAction(stateName, new QActionName(stateName, stateNameNext)));
            fromTo.AddActionResult(new QActionResult(fromTo, stateNameNext, 1.0));
            // E -> D
            stateNameNext = StateNameEnum.D.EnumToString();
            state.AddAction(fromTo = new QAction(stateName, new QActionName(stateName, stateNameNext)));
            fromTo.AddActionResult(new QActionResult(fromTo, stateNameNext, 1.0));
            // E -> F
            stateNameNext = StateNameEnum.F.EnumToString();
            state.AddAction(fromTo = new QAction(stateName, new QActionName(stateName, stateNameNext)));
            fromTo.AddActionResult(new QActionResult(fromTo, stateNameNext, 1.0));

            // State F
            stateName = StateNameEnum.F.EnumToString();
            q.States.Add(state = new QState(stateName, q));
            // F -> C
            stateNameNext = StateNameEnum.C.EnumToString();
            state.AddAction(fromTo = new QAction(stateName, new QActionName(stateName, stateNameNext)));
            fromTo.AddActionResult(new QActionResult(fromTo, stateNameNext, 1.0) { Reward = 100 });
            // F -> E
            stateNameNext = StateNameEnum.E.EnumToString();
            state.AddAction(fromTo = new QAction(stateName, new QActionName(stateName, stateNameNext)));
            fromTo.AddActionResult(new QActionResult(fromTo, stateNameNext, 1.0));

            // ----------- End Insert the path setup here -----------

            q.RunTraining();
            q.PrintQLearningStructure();
            q.ShowPolicy();
        }
    }
}

RockPaperScissorsDemo.cs

using System;
using System.Collections.Generic;
using QLearningFramework;

namespace ConsoleQLearning
{
    class RockPaperScissorsDemo
    {
        // ----------- Insert the state names here -----------
        internal enum StateNameEnum
        {
            Begin, Rock, Paper, Scissor
        }
        // ----------- End Insert the state names here -------

        static void Main(string[] args)
        {
            DateTime starttime = DateTime.Now;
            
            RockPaperScissors();

            double timespend = DateTime.Now.Subtract(starttime).TotalSeconds;
            Console.WriteLine("\n{0} sec. press a key ...", timespend.Pretty()); Console.ReadKey();
        }

         static void RockPaperScissors()
         {
             QLearning q = new QLearning { Episodes = 1000, Alpha = 0.1, Gamma = 0.9, 
                 MaxExploreStepsWithinOneEpisode = 1000 };            

             var opponentStyles = new List<double[]>();
             // rock paper scissor probability styles
             opponentStyles.Add(new []{ 0.33,0.33,0.33} );
             opponentStyles.Add(new [] { 0.5, 0.3, 0.2 });
             opponentStyles.Add(new [] { 0.2, 0.5, 0.3 });
             opponentStyles.Add(new[] { 0.1, 0.1, 0.8 });
             int index = new Random().Next(opponentStyles.Count);
             var opponent = opponentStyles[index];

             // opponent probability pick 
             double rockOpponent = opponent[0];
             double paperOpponent = opponent[1];
             double scissorOpponent = opponent[2];

             QAction fromTo;
             QState state;
             string stateName;
             string stateNameNext;

             // ----------- Begin Insert the path setup here -----------

             // insert the end states here
             q.EndStates.Add(StateNameEnum.Rock.EnumToString());
             q.EndStates.Add(StateNameEnum.Paper.EnumToString());
             q.EndStates.Add(StateNameEnum.Scissor.EnumToString());

             // State Begin
             stateName = StateNameEnum.Begin.EnumToString();
             q.AddState(state = new QState(stateName, q));             
             
             // action Rock
             stateNameNext = StateNameEnum.Rock.EnumToString();
             state.AddAction(fromTo = new QAction(stateName, new QActionName(stateName, stateNameNext)));             
             // action outcome probability
             fromTo.AddActionResult(new QActionResult(fromTo, StateNameEnum.Rock.EnumToString(), 
                 rockOpponent) { Reward = 0 });
             fromTo.AddActionResult(new QActionResult(fromTo, StateNameEnum.Paper.EnumToString(), 
                 paperOpponent) { Reward = -10 });
             fromTo.AddActionResult(new QActionResult(fromTo, StateNameEnum.Scissor.EnumToString(), 
                 scissorOpponent) { Reward = 100 });             
             
             // action paper
             stateNameNext = StateNameEnum.Paper.EnumToString();
             state.AddAction(fromTo = new QAction(stateName, new QActionName(stateName, stateNameNext)));
             // action outcome probability
             fromTo.AddActionResult(new QActionResult(fromTo, StateNameEnum.Rock.EnumToString(), 
                 rockOpponent) { Reward = 100 });
             fromTo.AddActionResult(new QActionResult(fromTo, StateNameEnum.Paper.EnumToString(), 
                 paperOpponent) { Reward = 0 });
             fromTo.AddActionResult(new QActionResult(fromTo, StateNameEnum.Scissor.EnumToString(), 
                 scissorOpponent) { Reward = -10 });

             // action scissor
             stateNameNext = StateNameEnum.Scissor.EnumToString();
             state.AddAction(fromTo = new QAction(stateName, new QActionName(stateName, stateNameNext)));
             // action outcome probability
             fromTo.AddActionResult(new QActionResult(fromTo, StateNameEnum.Rock.EnumToString(), 
                 rockOpponent) { Reward = -10 });
             fromTo.AddActionResult(new QActionResult(fromTo, StateNameEnum.Paper.EnumToString(), 
                 paperOpponent) { Reward = 100 });
             fromTo.AddActionResult(new QActionResult(fromTo, StateNameEnum.Scissor.EnumToString(), 
                 scissorOpponent) { Reward = 0 });
             // ----------- End Insert the path setup here -----------

             q.RunTraining();
             q.PrintQLearningStructure();
             q.ShowPolicy();

             Console.WriteLine("\n** Opponent style **");
             Console.WriteLine(string.Format("style is rock {0} paper {1} scissor {2}", 
                 opponent[0].Pretty(), opponent[1].Pretty(), opponent[2].Pretty()));
         }        
    }
}

QLearning.cs

using System;
using System.Collections.Generic;
using System.Globalization;
using System.Linq;
using System.Text;

namespace QLearningFramework
{  
    /// <summary>
    /// Version 0.1
    /// Author: Kunuk Nykjaer
    /// one to many relationships data structure
    /// QLearning [1 -- *] State [1 -- *] Action [1 -- *] ActionResult
    /// </summary>
    class QLearning
    {
        public List<QState> States { get; private set; }
        public Dictionary<string, QState> StateLookup { get; private set; }

        public double Alpha { get; internal set; }
        public double Gamma { get; internal set; }

        public HashSet<string> EndStates { get; private set; }
        public int MaxExploreStepsWithinOneEpisode { get; internal set; } //avoid infinite loop
        public bool ShowWarning { get; internal set; } // show runtime warnings regarding q-learning
        public int Episodes { get; internal set; }

        public QLearning()
        {
            States = new List<QState>();
            StateLookup = new Dictionary<string, QState>();
            EndStates = new HashSet<string>();

            // Default when not set
            MaxExploreStepsWithinOneEpisode = 1000;
            Episodes = 1000;
            Alpha = 0.1;
            Gamma = 0.9;
            ShowWarning = true;
        }

        public void AddState(QState state)
        {
            States.Add(state);
        }

        public void RunTraining()
        {
            QMethod.Validate(this);

            /*		 
		    For each episode: Select random initial state 
			Do while not reach goal state
				Select one among all possible actions for the current state 
				Using this possible action, consider to go to the next state 
				Get maximum Q value of this next state based on all possible actions 				
                Set the next state as the current state
		    */

            // For each episode
            var rand = new Random();
            long maxloopEventCount = 0;

            // Train episodes
            for (long i = 0; i < Episodes; i++)
            {
                long maxloop = 0;
                // Select random initial state			
                int stateIndex = rand.Next(States.Count);
                QState state = States[stateIndex];
                QAction action = null;
                do
                {
                    if (++maxloop > MaxExploreStepsWithinOneEpisode)
                    {
                        if(ShowWarning)
                        {
                            string msg = string.Format(
                            "{0} !! MAXLOOP state: {1} action: {2}, {3} endstate is to difficult to reach?",
                            ++maxloopEventCount, state, action, "maybe your path setup is wrong or the ");
                            QMethod.Log(msg);
                        }
                        
                        break;
                    }

                    // no actions, skip this state
                    if(state.Actions.Count==0)
                        break;

                    // Selection strategy is random based on probability
                    int index = rand.Next(state.Actions.Count);
                    action = state.Actions[index];

                    // Using this possible action, consider to go to the next state
                    // Pick random Action outcome
                    QActionResult nextStateResult = action.PickActionByProbability();
                    string nextStateName = nextStateResult.StateName;

                    double q = nextStateResult.QEstimated;
                    double r = nextStateResult.Reward;
                    double maxQ = MaxQ(nextStateName);

                    // Q(s,a)= Q(s,a) + alpha * (R(s,a) + gamma * Max(next state, all actions) - Q(s,a))
                    double value = q + Alpha * (r + Gamma * maxQ - q); // q-learning                  
                    nextStateResult.QValue = value; // update

                    // is end state go to next episode
                    if (EndStates.Contains(nextStateResult.StateName))
                        break;

                    // Set the next state as the current state                    
                    state = StateLookup[nextStateResult.StateName];

                } while (true);
            }
        }


        double MaxQ(string stateName)
        {
            const double defaultValue = 0;

            if(!StateLookup.ContainsKey(stateName))            
                return defaultValue;                            

            QState state = StateLookup[stateName];
            var actionsFromState = state.Actions;
            double? maxValue = null;
            foreach (var nextState in actionsFromState)
            {
                foreach (var actionResult in nextState.ActionsResult)
                {
                    double value = actionResult.QEstimated;
                    if (value > maxValue || !maxValue.HasValue)
                        maxValue = value;
                }
            }

            // no update
            if (!maxValue.HasValue && ShowWarning) 
                QMethod.Log(string.Format("Warning: No MaxQ value for stateName {0}",
                    stateName) );

            return maxValue.HasValue ? maxValue.Value : defaultValue;
        }

        public void PrintQLearningStructure()
        {
            Console.WriteLine("** Q-Learning structure **");
            foreach (QState state in States)
            {
                Console.WriteLine("State {0}", state.StateName);
                foreach (QAction action in state.Actions)
                {
                    Console.WriteLine("  Action " + action.ActionName);
                    Console.Write(action.GetActionResults());
                }
            }
            Console.WriteLine();
        }

        public void ShowPolicy()
        {
            Console.WriteLine("** Show Policy **");
            foreach (QState state in States)
            {
                double max = Double.MinValue;
                string actionName = "nothing";
                foreach (QAction action in state.Actions)
                {
                    foreach (QActionResult actionResult in action.ActionsResult)
                    {
                        if (actionResult.QEstimated > max)
                        {
                            max = actionResult.QEstimated;
                            actionName = action.ActionName.ToString();
                        }
                    }
                }

                Console.WriteLine(string.Format("From state {0} do action {1}, max QEstimated is {2}",
                    state.StateName, actionName, max.Pretty()));
            }
        }
    }

    class QState
    {        
        public string StateName { get; private set; }
        public List<QAction> Actions { get; private set; }

        public void AddAction(QAction action)
        {
            Actions.Add(action);
        }

        public QState(string stateName, QLearning q)
        {            
            q.StateLookup.Add(stateName, this);
            StateName = stateName;
            Actions = new List<QAction>();
        }

        public override string ToString()
        {
            return string.Format("StateName {0}", StateName);
        }
    }

    class QAction
    {
        private static readonly Random Rand = new Random();
        public QActionName ActionName { get; internal set; }
        public string CurrentState { get; private set; }
        public List<QActionResult> ActionsResult { get; private set; }

        public void AddActionResult(QActionResult actionResult)
        {
            ActionsResult.Add(actionResult);
        }

        public string GetActionResults()
        {
            var sb = new StringBuilder();
            foreach (QActionResult actionResult in ActionsResult)
                sb.AppendLine("     ActionResult " + actionResult);

            return sb.ToString();
        }

        public QAction(string currentState, QActionName actionName = null)
        {
            CurrentState = currentState;
            ActionsResult = new List<QActionResult>();
            ActionName = actionName;
        }

        // The sum of action outcomes must be close to 1
        public void ValidateActionsResultProbability()
        {
            const double epsilon = 0.1;

            if (ActionsResult.Count == 0)
                throw new ApplicationException(string.Format(
                    "ValidateActionsResultProbability is invalid, no action results:\n {0}", 
                    this));

            double sum = ActionsResult.Sum(a => a.Probability);
            if (Math.Abs(1 - sum) > epsilon)
                throw new ApplicationException(string.Format(
                    "ValidateActionsResultProbability is invalid:\n {0}", this));
        }

        public QActionResult PickActionByProbability()
        {
            double d = Rand.NextDouble();
            double sum = 0;
            foreach (QActionResult actionResult in ActionsResult)
            {
                sum += actionResult.Probability;
                if (d <= sum)
                    return actionResult;
            }
            
            // we might get here if sum probability is below 1.0 e.g. 0.99 
            // and the d random value is 0.999
            if (ActionsResult.Count > 0) 
                return ActionsResult.Last();

            throw new ApplicationException(string.Format("No PickAction result: {0}", this));
        }

        public override string ToString()
        {
            double sum = ActionsResult.Sum(a => a.Probability);
            return string.Format("ActionName {0} probability sum: {1} actionResultCount {2}",
                ActionName, sum, ActionsResult.Count);
        }
    }

    class QActionResult
    {
        public string StateName { get; internal set; }
        public string PrevStateName { get; internal set; }
        public double QValue { get; internal set; } // Q value is stored here        
        public double Probability { get; internal set; }
        public double Reward { get; internal set; }

        public double QEstimated
        {
            get { return QValue * Probability; }
        }

        public QActionResult(QAction action, string stateNameNext = null, 
            double probability = 1, double reward = 0)
        {
            PrevStateName = action.CurrentState;
            StateName = stateNameNext;
            Probability = probability;
            Reward = reward;
        }

        public override string ToString()
        {
            return string.Format("State {0}, Prob. {1}, Reward {2}, PrevState {3}, QE {4}",
                StateName, Probability.Pretty(), Reward, PrevStateName, QEstimated.Pretty());
        }
    }

    public class QActionName
    {
        public string From { get; private set; }
        public string To { get; private set; }

        public QActionName(string from, string to = null)
        {
            From = from;
            To = to;
        }

        public override string ToString()
        {
            return GetActionName();
        }

        public string GetActionName()
        {
            if (To == null) 
                return From;
            return QMethod.ActionNameFromTo(From,To);
        }
    }

    static class QMethod
    {
        public static void Log(string s)
        {
            Console.WriteLine(s);
        }

        public static readonly CultureInfo CultureEnUs = new CultureInfo("en-US");

        public static string ToStringEnUs(this double d)
        {
            return d.ToString("G", CultureEnUs);
        }
        public static string Pretty(this double d)
        {
            return ToStringEnUs(Math.Round(d, 2));
        }

        public static string ActionNameFromTo(string a, string b)
        {
            return string.Format("from_{0}_to_{1}", a, b);
        }

        public static string EnumToString<T>(this T type)
        {
            return Enum.GetName(typeof(T), type);
        }

        public static void ValidateRange(double d, string origin = null)
        {
            if (d < 0 || d > 1)
            {
                string s = origin ?? string.Empty;
                throw new ApplicationException(string.Format("ValidateRange error: {0} {1}", d, s));
            }
        }

        public static void Validate(QLearning q)
        {
            foreach (var state in q.States)
            {
                foreach (var action in state.Actions)
                {
                    action.ValidateActionsResultProbability();
                }
            }
        }
    }
}

Currently you have to know all the probability values in advance for the action outcomes, which are used for the Q-learning structure setup. The source code can easily be adapted if you need runtime knowledge of probability outcomes. Just add a probability function lookup and look at the Q-learning formula calculation area. Maybe I will look more into that at a later time.

Written by kunuk Nykjaer

January 14, 2012 at 7:53 pm