Windows thread pool keeps threads even after being closed

Windows Thread pool maintains threads so that it doesn’t have to create a new one when a work item is submitted. It may surprise you that it maintains threads even after you close the thread pool with CloseThreadpool() API function.

The following code demonstrates it. It starts 100 thread pool work items and monitors the number of threads in the process for the next 70 second. The thread pool object is closed immediately after posting the tasks.

LONG g_activeThreadCount = 0;

void CALLBACK WorkProc(PTP_CALLBACK_INSTANCE Instance, PVOID Context, PTP_WORK Work)
{
    InterlockedIncrement(&g_activeThreadCount);
    Sleep(5000);
    InterlockedDecrement(&g_activeThreadCount);
}

void PostThreadpoolWorks(int numberOfThreadpoolWorks)
{
    PTP_POOL pool = CreateThreadpool(NULL);
    if(!pool)
    {
        throw std::runtime_error("Fail to CreateThreadpool.");
    }

    PTP_WORK work = CreateThreadpoolWork(&WorkProc, NULL, NULL);
    if(!work)
    {
        CloseThreadpool(pool);
        throw std::runtime_error("Fail to CreateThreadpoolWork.");
    }

    for(int i = 0; i < numberOfThreadpoolWorks; i++)
    {
        SubmitThreadpoolWork(work);
    }

    CloseThreadpoolWork(work);
    CloseThreadpool(pool);
}

void Test1()
{
    // Post 100 thread pool work. The thread pool object is closed 
    // immediately.
    PostThreadpoolWorks(100);
    
    // keep monitoring the number of threads for 70 seconds.
    DWORD tick = GetTickCount();
    for(int i = 0; i < 100; i++)
    {
        cout << (GetTickCount() - tick) / 1000.0 << " " << GetNumOfThreads() << " " << g_activeThreadCount << endl;
        Sleep(1000);
    }
}

As each work item is finished in 5 seconds, the number of running threads (green) is 100 just for a few seconds. However, the number of threads in the process (red) is maintained for the next 68 seconds.

To confirm the observation, I started threads so that the number of running threads forms the triangle.

It indicates that the process maintains the maximum number of threads used within the last 65 to 70 seconds even though the thread pool object is closed.

Here is the program used in the experiment.

DWORD GetNumOfThreads()
{
    HANDLE hThreadSnap = INVALID_HANDLE_VALUE;
    THREADENTRY32 te32;

    hThreadSnap = CreateToolhelp32Snapshot(TH32CS_SNAPTHREAD, 0);
    if(hThreadSnap == INVALID_HANDLE_VALUE)
    {
        return (DWORD)-1;
    }

    te32.dwSize = sizeof(THREADENTRY32);
    
    if(!Thread32First(hThreadSnap, &te32))
    {
        CloseHandle(hThreadSnap);
        return (DWORD)-1;
    }
    
    DWORD count = 0;
    do
    {
        if( te32.th32OwnerProcessID == GetCurrentProcessId() )
        {
            count++;
        }
    } while( Thread32Next(hThreadSnap, &te32 ) );
    
    CloseHandle( hThreadSnap );
    return count;
}

LONG g_activeThreadCount = 0;

void CALLBACK WorkProc(PTP_CALLBACK_INSTANCE Instance, PVOID Context, PTP_WORK Work)
{
    InterlockedIncrement(&g_activeThreadCount);
    Sleep(5000);
    InterlockedDecrement(&g_activeThreadCount);
}

void PostThreadpoolWorks(int numberOfThreadpoolWorks)
{
    PTP_POOL pool = CreateThreadpool(NULL);
    if(!pool)
    {
        throw std::runtime_error("Fail to CreateThreadpool.");
    }

    PTP_WORK work = CreateThreadpoolWork(&WorkProc, NULL, NULL);
    if(!work)
    {
        CloseThreadpool(pool);
        throw std::runtime_error("Fail to CreateThreadpoolWork.");
    }

    for(int i = 0; i < numberOfThreadpoolWorks; i++)
    {
        SubmitThreadpoolWork(work);
    }

    CloseThreadpoolWork(work);
    CloseThreadpool(pool);
}

void Test1()
{
    // Post 100 thread pool work. The thread pool object is closed 
    // immediately.
    PostThreadpoolWorks(100);
    
    // keep monitoring the number of threads for 70 seconds.
    DWORD tick = GetTickCount();
    for(int i = 0; i < 100; i++)
    {
        cout << (GetTickCount() - tick) / 1000.0 << " " << GetNumOfThreads() << " " << g_activeThreadCount << endl;
        Sleep(1000);
    }
}

DWORD CALLBACK Test2Proc(LPVOID param)
{
    // Post some thread pool works for every 5 seconds.
    for(int i = 0; i < 10; i++)
    {
        PostThreadpoolWorks(i * 50);
        Sleep(5000);
    }
    for(int i = 10; i >= 0; i--)
    {
        PostThreadpoolWorks(i * 50);
        Sleep(5000);
    }
    return 0;
}

void Test2()
{
    // Start threads to post thread pool works.
    HANDLE hThread = ::CreateThread(NULL, 0, &Test2Proc, 0, 0, NULL);
 
    // keep monitoring the number of threads for 200 seconds.
    DWORD tick = GetTickCount();
    for(int i = 0; i < 200; i++)
    {
        cout << (GetTickCount() - tick) / 1000.0 << " " << GetNumOfThreads() << " " << g_activeThreadCount << endl;
        Sleep(1000);
    }

    CloseHandle(hThread);
}

int _tmain(int argc, _TCHAR* argv[])
{
    Test1();
    Test2();
	return 0;
}
Posted in Advanced Debugging, C++ | Tagged , | 1 Comment

std::unique_ptr for Windows handles

To close a Windows handle with std::unique_ptr, we need to define a type with operator() and specify it as the second template parameter.

struct Deleter
{
    // By defining the pointer type, we can delete a type other than T*.
    // In other words, we can declare unique_ptr<HANDLE, Deleter> instead of
    // unique_ptr<void, Deleter> which leaks the HANDLE abstraction.
    typedef HANDLE pointer;

    void operator()(HANDLE h)
    {
        if(h != INVALID_HANDLE_VALUE)
        {
            CloseHandle(h);
        }
    }
};

void OpenAndWriteFile()
{
    // Specify a deleter as a template argument.
    std::unique_ptr<HANDLE, Deleter> file(CreateFile(_T("test.txt"),
                                                     GENERIC_WRITE,
                                                     0,
                                                     NULL,
                                                     CREATE_ALWAYS,
                                                     FILE_ATTRIBUTE_NORMAL,
                                                     NULL));
    if(file.get() != INVALID_HANDLE_VALUE)
    {
        DWORD size;
        WriteFile(file.get(), "Hello World", 11, &size, NULL);
    }
}

This is different from std::shared_ptr where we can just specify the functor in a constructor parameter.

    std::shared_ptr<void> file(CreateFile(_T("test.txt"),
                                          GENERIC_WRITE,
                                          0,
                                          NULL,
                                          CREATE_ALWAYS,
                                          FILE_ATTRIBUTE_NORMAL,
                                          NULL),
                                CloseHandle);   // Specify deleter functor as a constructor parameter.

Although it’s extra work to define a new deleter type, std::unique_ptr has following advantage.

First, the smart pointer object doesn’t have to keep the deleter object, thus the size of the pointer becomes smaller. In fact, std::unique_ptr occupies the just same size as T*.

Second, Application can overwrite the pointer type. It means we can write:

std::unique_ptr<HANDLE, Deleter> foo(...);

instead of this HANDLE abstraction leakage version.

std::unique_ptr<void, Deleter> foo(...);
Posted in C++ | Tagged , | 4 Comments

HRD Emulator in HTML5

Just like MPEG-2 video uses VBV (Video Buffer Verifier), H.264 standard uses HRD (Hypothetical Reference Decoder) to define correctness of streams.

One of the verification provided by HRD is CPB buffer fullness. The idea is simple; feed H.264 stream to HRD and monitor the CPB fullness. Make sure the fullness never goes negative nor above the CPB buffer size. The importance of this buffer fullness requirement was explained here.

HRD defines the CPB fullness in somewhat counter intuitive way. To make it easy, I wrote an interactive HRD emulator in HTML5.

HRD Emulator (CBR)

The blue line the number of bits entered to HRD over time. The input rate is constant in CBR mode, i.e. the blue line is straight. The red line is the number of bits removed (= Decoded) from CPB. Thus, the CPB fullness is the blue line minus the red line for a given time.

Let’s select the VBR in Rate Control Mode.

HRD Emulator (VBR)

HRD Emulator (VBR)

The blue line now has some periods where the input rate is zero. H.264 standard uses init_cpb_removal_delay to control the period; Bits for a picture N can enter HRD only after the picture decode time minus init_cpb_removal_delay.

To see this rule more clearly, check “Show init_cpb_removal_delay” option. When the last bit of picture N – 1 arrives before the picture N decoding time minus init_cpb_removal_delay, the input rate goes down to zero.

HRD (VBR) with init_cpb_removal_delay

HRD (VBR) with init_cpb_removal_delay

The value of init_cpb_removal_delay also determines the initial decoding delay. It’s currently set to 50 ms. It means the first picture is decoded 50 ms after the first bit of the picture enters HRD.

The initial decoding delay determines the channel switching delay as discussed here. Can we reduce it to make channel switching faster?

HRD underflow due to the short initial delay

Not really. As shown above, reducing the value of init_cpb_removal_delay resulted in the underflow error (the red line crosses the blue line).

H.264 standard has introduced another parameter to allow the reduction of initial decoding delay at the expense of increased average bit rate. Here we have init_cpb_removal_delay_offset parameter. It’s added to init_cpb_removal_delay for every picture except the first one.

HRD with non-zero init_cpb_removal_delay_offset

H.264 HRD provides these parameters to encoders so that they can choose optimal bit stream configuration for each target devices.

Here are HTML and Java Script of the HRD emulator above.

<html>
  <head>
    <title>HRD Emulator</title>
    <script src="./script.js" type="text/javascript"></script>
    
    <style type="text/css">
      #plot {
        float : left
      }
      #control-container {
        float : left
      }
      #widget {
        float : right;
        width : 10em
      }
      #heading {
        float : left
      }
    </style>    
  </head>
  <body onLoad="init()">
    <div id="plot">
      <canvas id="canvas" width="800" height="480"></canvas>
    </div>

    <div id="control-container">
      <div class="control">
        <div class="heading">HRD bit_rate [<span id="showBitRate"> </span> kbps]</div>
        <div class="widget">
          <input id="bitRate" onChange="update()" type="range" min="0" max="2000" value="500"/>
        </div>
      </div>
      <div class="control">
        <div class="heading">initial_cpb_removal_delay [<span id="showICRD"> </span> ms]</div>
        <div>
          <input id="initial_cpb_removal_delay" onChange="update()" type="range" min="0" max="100" value="50"/>
        </div>
      </div>
      <div class="control">
        <div class="heading">initial_cpb_removal_delay_offset [<span id="showICRDO"> </span> ms]</div>
        <div>
          <input id="initial_cpb_removal_delay_offset" onChange="update()" type="range" min="0" max="100" value="0"/>
        </div>
      </div>
      <div class="control">
        <div>Rate Control Mode</div>
        <div>
          <input type="radio" name="mode" value="cbr" onClick="setMode('cbr'); update()" checked="checked" />CBR
        </div>
        <div>
          <input type="radio" name="mode" value="vbr" onClick="setMode('vbr');  update()" />VBR
        </div>
      </div>
      <div class="control">
        <div>
          <input type="checkbox" id="isICRDVisible" onClick="update()"/>Show init_cpb_removal_delay
        </div>
      </div>
      <div class="control">
        <div>
          <input type="checkbox" id="isICRDOVisible" onClick="update()"/>Show init_cpb_removal_delay_offset
        </div>
      </div>
      <div class="control">
        <input id="reset" type="button" value="Redraw Graph" onClick="resetStream(); update();"/>
      </div>
    </div>
  </body>
</html>
// Save this as "script.js"
var canvas;
var picture = new Array();
var pictureDuration                  = 900900;
var initial_cpb_removal_delay        = 900900;
var initial_cpb_removal_delay_offset = 90090;
var cbr_flag = true;

function init()
{
    canvas = document.getElementById('canvas');
    resetStream();
    update();
}

function resetStream()
{
    var bit_rate = document.getElementById('bitRate').value * 1000;
    var averagePicSize = pictureDuration / 27000000 * bit_rate / 8;
    for(var i = 0; i < 10; i++){
        picture[i] = averagePicSize + (Math.random() - 0.5) * averagePicSize;
    }
}

function tickToX(tick)
{
    return tick / 900900 * 64;
}

function levelToY(level)
{
    return canvas.height - level / 50;
}

function drawDecodePictures(ctx)
{
    ctx.strokeStyle = 'rgba(255, 0, 0, 255)';

    ctx.beginPath();
    ctx.moveTo(0, levelToY(0));
    ctx.lineTo(tickToX(initial_cpb_removal_delay), levelToY(0));

    var prevX = tickToX(initial_cpb_removal_delay)
    var prevLevel = 0;
    for(var i = 0; i < 50; i++){
        var level = prevLevel + picture[i];
        var x1 = prevX;
        var x2 = prevX + tickToX(pictureDuration)
        var y  = levelToY(level);

        ctx.lineTo(x1, y);
        ctx.lineTo(x2, y);
        prevLevel = level;
        prevX     = x2
    }
    ctx.stroke();
}

function levelFromPeriod(period, bitRate)
{
    return period / 27000000.0 * bitRate / 8;
}

function drawInputData(ctx)
{
    var t_af          = new Array();
    var t_ai          = new Array();
    var t_ai_earliest = new Array();
    var t_rn          = new Array();

    var bit_rate = document.getElementById('bitRate').value * 1000;

    t_ai[0]   = 0;
    t_af[0]   = picture[0] * 8 * 27000000 / bit_rate;
    t_rn[0]   = initial_cpb_removal_delay;

    for(var n = 1; n < picture.length; n++)
    {
        t_rn[n] = t_rn[n - 1] + pictureDuration;
        if(cbr_flag == true)
        {
            t_ai[n] = t_af[n - 1];
        }
        else
        {
            t_ai_earliest = t_rn[n] - (initial_cpb_removal_delay + initial_cpb_removal_delay_offset);
            t_ai[n] = Math.max(t_af[n - 1], t_ai_earliest);
        }
        t_af[n] = t_ai[n] + picture[n] * 8 * 27000000 / bit_rate;
    }

    ctx.strokeStyle = 'rgba(0, 0, 255, 255)';
    ctx.beginPath();
    ctx.moveTo(tickToX(0), levelToY(0));

    var level = 0;
    for(var n = 0; n < picture.length; n++)
    {
        ctx.lineTo(tickToX(t_ai[n]), levelToY(level));
        level += picture[n];
        ctx.lineTo(tickToX(t_af[n]), levelToY(level));
    }
    ctx.stroke();

    level = 0;
    for(var n = 0; n < picture.length; n++)
    {
        var isICRDVisible = document.getElementById("isICRDVisible").checked;
        if(isICRDVisible){
            // draw initial_cpb_removal_delay period
            var icrd_x1 = tickToX(t_rn[n] - initial_cpb_removal_delay)
            var icrd_x2 = tickToX(t_rn[n])
            var icrd_y  = levelToY(level) - 4

            ctx.strokeStyle = 'rgba(0, 0, 0, 255)';
            ctx.beginPath();
            ctx.moveTo(icrd_x1, icrd_y);
            ctx.lineTo(icrd_x2, icrd_y);
            ctx.stroke();

            ctx.beginPath();
            ctx.moveTo(icrd_x1 + 2, icrd_y - 2);
            ctx.lineTo(icrd_x1, icrd_y);
            ctx.lineTo(icrd_x1 + 2, icrd_y + 2);
            ctx.stroke();

            ctx.beginPath();
            ctx.moveTo(icrd_x2 - 2, icrd_y - 2);
            ctx.lineTo(icrd_x2, icrd_y);
            ctx.lineTo(icrd_x2 - 2, icrd_y + 2);
            ctx.stroke();
        }

        var isICRDOVisible = document.getElementById("isICRDOVisible").checked;
        if(isICRDOVisible){
            // draw initial_cpb_removal_delay period
            var icrdo_x1 = tickToX(t_rn[n] - initial_cpb_removal_delay - initial_cpb_removal_delay_offset)
            var icrdo_x2 = icrd_x1
            var icrdo_y  = levelToY(level) - 4

            ctx.strokeStyle = 'rgba(0, 0, 0, 255)';
            ctx.beginPath();
            ctx.moveTo(icrdo_x1, icrdo_y);
            ctx.lineTo(icrdo_x2, icrdo_y);
            ctx.stroke();

            ctx.beginPath();
            ctx.moveTo(icrdo_x1 + 2, icrdo_y - 2);
            ctx.lineTo(icrdo_x1, icrdo_y);
            ctx.lineTo(icrdo_x1 + 2, icrdo_y + 2);
            ctx.stroke();

            ctx.beginPath();
            ctx.moveTo(icrdo_x2 - 2, icrdo_y - 2);
            ctx.lineTo(icrdo_x2, icrdo_y);
            ctx.lineTo(icrdo_x2 - 2, icrdo_y + 2);
            ctx.stroke();
        }
        level += picture[n];
    }
}

function setMode(mode)
{
  cbr_flag = mode == "cbr";
}

function updateUI(uiName, valueName)
{
    document.getElementById(uiName).innerHTML = document.getElementById(valueName).value
}

function update()
{
    var ctx    = canvas.getContext('2d');
    ctx.fillStyle = 'rgba(255, 255, 255, 255)';
    ctx.fillRect(0, 0, canvas.width, canvas.height);

    updateUI("showBitRate", "bitRate");
    updateUI("showICRD", "initial_cpb_removal_delay");
    updateUI("showICRDO", "initial_cpb_removal_delay_offset");

    initial_cpb_removal_delay = document.getElementById('initial_cpb_removal_delay').value * 27000;
    initial_cpb_removal_delay_offset = document.getElementById('initial_cpb_removal_delay_offset').value * 27000;

    drawDecodePictures(ctx);
    drawInputData(ctx);
};
Posted in Video | Tagged , , , , | 1 Comment

XSLT Example: Add a new node to elements

How to add a new element to every child element of a certain parent?

For example, consider you want to add <type> element to the element under <library>. The <type> element should have the name of the element as the value.

<library>
  <book>
    <title>Programming Erlang</title>
    <author>Armstrong</author>
  </book>
  <magazine>
    <title>TAMIYA TOURING CAR</title>
  </magazine>
</library>

The resultant XML should look like this.

<?xml version="1.0"?>
<library>
  <book>
    <type>book</type>
    <title>Programming Erlang</title>
    <author>Armstrong</author>
  </book>
  <magazine>
    <type>magazine</type>
    <title>TAMIYA TOURING CAR</title>
  </magazine>
</library>

The following XSLT does the trick.

<?xml version="1.0" ?>
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0">

  <!-- If an element is a child of library, copy the content and add
       type element. -->
  <xsl:template match="library/*">
    <xsl:copy>
      <xsl:element name="type">
        <xsl:value-of select="name(.)"/>
      </xsl:element>

      <xsl:call-template name="copy-children"/>
    </xsl:copy>
  </xsl:template>

  <!-- Copy the children of the current node. -->
  <xsl:template name="copy-children">
    <xsl:copy-of select="./*"/>
  </xsl:template>

  <!-- Generic identity template -->
  <xsl:template match="node()|@*">
    <xsl:copy>
      <xsl:apply-templates select="@*"/>
      <xsl:apply-templates/>
    </xsl:copy>
  </xsl:template>
</xsl:stylesheet>
Posted in Tips | Tagged | 2 Comments

TTML, DFXP and SMPTE-TT

TTML is a new way to put text (caption and/or subtitle) in Web video. The standard is published by W3C (http://www.w3.org/TR/ttaf1-dfxp/).

It’s a lengthy standard – it could be difficult to answer even simple questions like these.

What does it specify? What is DFXP? What is SMPTE-TT?

We need to realize there are three parties in the field. First, there are Web video player developers like Microsoft (Silverlight), and Google (HTML5). They use TTML to display captions on the video player. Second, there are movie producers who author captions. Third, there are traditional broadcasters like CNN. They have large video assets with closed captions and want to convert them to TTML for over-the-top delivery.

It’s not good idea to write huge standard to cover all these parties. Better way is to separate the base standard (=features) and the implementation requirement (=profiles).

The base standard defines various features needed for captioning. For example, there is a feature called #timing. To claim the support, the implementation must understand the timing information associated with the text. There is another a feature called #direction. To claim the support, the implementation must support right-to-left text rendering like Arabic language.

It’s tedious to claim the support of each feature. Profile is a group of features needed for a typical use case. For example, “DFXP Presentation” profile contains a set of features needed for caption rendering. If your implementation (=Web video player) supports all the features in the profile, you can claim the support of the profile.

Base and Profile Specification

It’s a clever way to design a standard. A profile usually contains just a few features, thus the implementation is easy and simple. There are (and will be) many profiles. They have good compatibility because they share the features defined in the base standard.

The TTML recommendation specifies followings.

  • Syntax and semantic of each feature. (This is what the base standard for)
  • The way to claim a given TTML document follows a certain profile.
  • Three profiles: DFXP Presentation, DFXP transform, and DFXP full.

DFXP profiles are defined in TTML recommendation because the base standard itself is useless without some profiles. DFXP stands for “Distribution Format Exchange Profile”. DFXP Presentation profile is used by video players which support TTML captions. DFXP Transform profile is used for video editing stations where they need to convert from/to various other caption format. DFXP Full profile includes all the features defined in the base standard.

So far, so good. Let’s move on.

SMPTE is a standard organization for TV and film industry. SMPTE found TTML useful but the feature set is not sufficient.

  • No bitmap images. The feature is needed because a caption format in Europe (=DVB subtitle) uses a sequence of bitmaps and SMPTE wants to render them in TTML enabled players.
  • No binary data pass-through. SMPTE wants to carry CEA-708 bit stream data from source to destination in TTML format.
  • No information about how the caption is designed. SMPTE wants to use TTML in two different ways. One is keeping look & feel of the legacy caption (=preserve mode). The other is to use the maximum presentation features of TTML to make the caption nicer (= enhance mode). SMPTE needs a feature to tell which mode is in use.

SMPTE decided to extend the TTML to meet their requirement. First, SMPTE defines three new features. (They are called “extensions” because they are not the part of official W3C TTML recommendations)

  • #image: Feature to use bitmap image as a caption text
  • #data: Feature to carry binary data.
  • #information: Feature to declare which translation mode (preserve or enhanced) is in use.

SMPTE also added a new profile called SMPTE-TT. To claim the support of SMPTE-TT profile, the implementation must support DFXP Full profile with the three new features added by SMPTE.

 

Posted in Video | Tagged , , , | 11 Comments

Debug Windows Service through Remote Desktop

Debugging Windows NT Service is headache. Doing so through remote desktop is even more difficult. But the remote debugging makes the story easier.

Let’s review the background.

Debugging a service is challenging because the service process starts before you log-in to the operating system. It’s not possible to debug a service process by starting CDB:


# Doesn't work!
cdb "C:\Program Files\MyService.exe"

Instead, you have to edit the Image File Execution Option registry key.

The debugger starts when the service process is initiated. But there is another problem. The debugger starts in session-0 as this is where the service process is running. In Windows Vista and later, the session-0 is not “interactive”. It means the debugger window is not visible to the console user.

A good news is that you can make a service to interact with a special desktop by setting an option.

The knowledge base below explains the story up to here. It’s worth reading but there is a bad news which is not covered by the article.
http://support.microsoft.com/kb/824344

A problem is that the interactive service doesn’t work when you are debugging through remote desktop. In other words, the special desktop is not available to he remote desktop users.

You can solve this problem by using remote debugging. First, use the image file execution option to start a debugger with -server option.

Then access the target from the remote desktop console:

It enables you to debug Windows NT services without using session-0 interactive desktop.

Posted in Advanced Debugging, Tips | Leave a comment

Shuffling an array

In-Place swapping is a common way to shuffle an array. However, it’s easy to make an implementation mistake.

When the size of the array is N, there are N! permutations. The goal is, after the shuffling operation, each permutation shall happen in the probability of 1/N!.

The first (incorrect) example is swapping two random indexes.

for (int i = 0; i < array.Length; i++)
{
  int swapIndex1 = rand.Next(0, array.Length);
  int swapIndex2 = rand.Next(0, array.Length);
  int temp = array[swapIndex1];
  array[swapIndex1] = array[swapIndex2];
  array[swapIndex2] = temp;
}

This is incorrect because an array shuffled in an iteration can be back to the original state in another iteration. If you shuffle many times and plot the number of occurrences of each permutation, certain permutations happens much more often than other permutations as shown below.

How about calling random function just once per iteration? Nice try but be careful on the random range. A common mistake is that, for each iteration, the swapping target is selected from the whole array.

for (int i = 0; i < array.Length; i++)
{
  int swapIndex = rand.Next(0, array.Length);
  int temp = array[i];
  array[i] = array[swapIndex];
  array[swapIndex] = temp;
}

As this algorithm still allows the array state to go back to the original state, the probability of each permutation is not uniform. The graph plot is shown below.

The correct way is to get the random of the arrray[i .. N – 1] for each iteration i.

for (int i = 0; i < array.Length; i++)
{
  # Take swap index [i ... N-1]
  int swapIndex = rand.Next(i, array.Length);
  int temp = array[i];
  array[i] = array[swapIndex];
  array[swapIndex] = temp;
}

The graph is shown below. Each permutation occurs with roughly same probabilities.

The whole program used for the testing is shown below.

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;

namespace PermutationTest
{
    class Program
    {
        static int CalcPermIndex(int[] array)
        {
            int permIndex = 0;
            foreach (int v in array)
            {
                permIndex <<= 3;
                permIndex += v;
            }
            return permIndex;
        }

        static int Shuffle1(Random rand)
        {
            int[] array = { 0, 1, 2, 3, 4, 5 };

            for (int i = 0; i < array.Length; i++)
            {
                int swapIndex1 = rand.Next(0, array.Length);
                int swapIndex2 = rand.Next(0, array.Length);
                int temp = array[swapIndex1];
                array[swapIndex1] = array[swapIndex2];
                array[swapIndex2] = temp;
            }
            return CalcPermIndex(array);
        }

        static int Shuffle2(Random rand)
        {
            int[] array = { 0, 1, 2, 3, 4, 5 };

            for (int i = 0; i < array.Length; i++)
            {
                int swapIndex = rand.Next(0, array.Length);
                int temp = array[i];
                array[i] = array[swapIndex];
                array[swapIndex] = temp;
            }
            return CalcPermIndex(array);
        }

        static int Shuffle3(Random rand)
        {
            int[] array = { 0, 1, 2, 3, 4, 5};

            for (int i = 0; i < array.Length; i++)
            {
                int swapIndex = rand.Next(i, array.Length);
                int temp = array[i];
                array[i] = array[swapIndex];
                array[swapIndex] = temp;
            }
            return CalcPermIndex(array);
        }

        static void Add(Dictionary<int, int[]> counter, int index, int pos)
        {
            if (counter.ContainsKey(index))
            {
                counter[index][pos]++;
            }
            else
            {
                int[] array = new int[3];
                array[pos] = 1;
                counter.Add(index, array);
            }

        }
        static void Main(string[] args)
        {
            Random rand = new Random();
            Dictionary<int, int[]> counter = new Dictionary<int, int[]>();

            for (int i = 0; i < 10000000; i++)
            {
                Add(counter, Shuffle1(rand), 0);
                Add(counter, Shuffle2(rand), 1);
                Add(counter, Shuffle3(rand), 2);
            }

            foreach (var i in counter.Values)
            {
                Console.WriteLine("{0} {1} {2}", i[0], i[1], i[2]);
            }
        }
    }
}
Posted in Algorithm | Tagged , , | Leave a comment