Condition variables performance of boost, Win32, and the C++11 standard library

About four years ago, I wrote the condition variables in Win32 API was much faster than boost threading library implementation. Since then the boost threading library has been rewritten and C++11 has introduced the threading support in the standard library. Let’s revisit the benchmark again.

The test program is bounded FIFO implemented with two condition variables. It passes 10,000,000 integers from the producer to the consumer. The test is conducted 50 times with the following environment.

– Intel(R) Core (TM) i7 950@3GHz.
– Windows 7 Professional
– Visual Studio 2012 Express (Update 1)

The average speed is shown here. Shorter is faster. Notice that it’s the result of 10 million times FIFO access. Although std::condition_variable is slower than others, the difference is pretty much nothing.

cv_comp_average

However, the distributions of elapsed time are still interesting. Among 50 times trial, Win32 and boost condition variables have pretty stable performance.

cv_comp_boost_dist

cv_comp_win32_dist

For unknown reason, however, std::condition_variable of Visual Studio 2012 Update 1 implementation sometimes takes very long time.

cv_comp_std_dist

Here is the test program.

#include <boost/thread.hpp>
#include <boost/timer.hpp>
#include <deque>
#include <thread>
#include <mutex>
#include <condition_variable>
#include <windows.h>

using namespace std;

class fifo
{
public:
    virtual ~fifo(){}
    virtual void push(int v) = 0;
    virtual int  pull()      = 0;
};

class boost_fifo : public fifo
{
public:
    boost_fifo(std::size_t s) : max_fifo_size(s){}

    void push(int v)
    {
        boost::mutex::scoped_lock lk(mtx);
        while(buffer.size() > max_fifo_size)
        {
            cv_slot.wait(lk);
        }
        buffer.push_back(v);
        cv_data.notify_one();
    }

    int pull()
    {
        boost::mutex::scoped_lock lk(mtx);
        while(buffer.empty())
        {
            cv_data.wait(lk);
        }
        int v = buffer.front();
        buffer.pop_front();
        cv_slot.notify_one();
        return v;
    }
private:
    std::size_t max_fifo_size;
    boost::mutex mtx;
    boost::condition_variable cv_slot;
    boost::condition_variable cv_data;
    std::deque<int> buffer;
};

class win32_fifo : public fifo
{
public:
    win32_fifo(std::size_t s) : max_fifo_size(s)
    {
        InitializeCriticalSection(&mtx);
        InitializeConditionVariable(&cv_slot);
        InitializeConditionVariable(&cv_data);
    }

    ~win32_fifo()
    {
        DeleteCriticalSection(&mtx);
    }

    void push(int v)
    {
        EnterCriticalSection(&mtx);
        while(buffer.size() > max_fifo_size)
        {
            SleepConditionVariableCS(&cv_slot, &mtx, INFINITE);
        }
        buffer.push_back(v);
        WakeConditionVariable(&cv_data);
        LeaveCriticalSection(&mtx);
    }

    int pull()
    {
        EnterCriticalSection(&mtx);
        while(buffer.empty())
        {
            SleepConditionVariableCS(&cv_data, &mtx, INFINITE);
        }

        int v = buffer.front();
        buffer.pop_front();
        WakeConditionVariable(&cv_slot);
        LeaveCriticalSection(&mtx);
        return v;
    }
private:
    std::size_t max_fifo_size;
    CRITICAL_SECTION mtx;
    CONDITION_VARIABLE cv_slot;
    CONDITION_VARIABLE cv_data;
    std::deque<int> buffer;
};

class std_fifo : public fifo
{
public:
    std_fifo(std::size_t s) : max_fifo_size(s){}

    void push(int v)
    {
        std::unique_lock<std::mutex> lk(mtx);
        while(buffer.size() > max_fifo_size)
        {
            cv_slot.wait(lk);
        }
        buffer.push_back(v);
        cv_data.notify_one();
    }

    int pull()
    {
        std::unique_lock<std::mutex> lk(mtx);
        while(buffer.empty())
        {
            cv_data.wait(lk);
        }
        int v = buffer.front();
        buffer.pop_front();
        cv_slot.notify_one();
        return v;
    }
private:
    std::size_t max_fifo_size;
    std::mutex mtx;
    std::condition_variable cv_slot;
    std::condition_variable cv_data;
    std::deque<int> buffer;
};

void push_loop(fifo* fifo_buffer)
{
    for(int i = 0; i < 10000000; i++)
    {
        fifo_buffer->push(i);
    }
}

void pull_loop(fifo* fifo_buffer)
{
    for(int i = 0; i < 10000000; i++)
    {
        fifo_buffer->pull();
    }
}

int main()
{
    vector<double> elapsedTime[3];
    
    static size_t const FIFO_SIZE = 16;
    for(int i = 0; i < 50; i++)
    {
        cerr << "BOOST FIFO: " << endl;
        {
            boost::timer tim;
            unique_ptr<fifo> fifo_buffer(new boost_fifo(FIFO_SIZE));
            boost::thread push_thread(bind(push_loop, fifo_buffer.get()));
            boost::thread pull_thread(bind(pull_loop, fifo_buffer.get()));
            
            push_thread.join();
            pull_thread.join();

            elapsedTime[0].push_back(tim.elapsed());
        }

        cerr << "WIN32 FIFO: " << endl;
        {
            boost::timer tim;
            unique_ptr<fifo> fifo_buffer(new win32_fifo(FIFO_SIZE));
            boost::thread push_thread(bind(push_loop, fifo_buffer.get()));
            boost::thread pull_thread(bind(pull_loop, fifo_buffer.get()));
            
            push_thread.join();
            pull_thread.join();
            
            elapsedTime[1].push_back(tim.elapsed());
        }

        cerr << "STD FIFO: " << endl;
        {
            boost::timer tim;
            unique_ptr<fifo> fifo_buffer(new std_fifo(FIFO_SIZE));
            boost::thread push_thread(bind(push_loop, fifo_buffer.get()));
            boost::thread pull_thread(bind(pull_loop, fifo_buffer.get()));
            
            push_thread.join();
            pull_thread.join();
            
            elapsedTime[2].push_back(tim.elapsed());
        }
    }
    
    for(auto v : elapsedTime)
    {
        for(auto e : v)
        {
            cout << e << ", ";
        }
        cout << endl;
    }

    return 1;
}

I used the following R code generate the graph. It also contains the raw data of elapsed time.

std <- c(5.183,  5.701,  7.006,  5.839,  5.524, 16.463,  8.336,  7.041,
         5.574,  5.098,  5.430,  5.034,  5.244,  6.088,  7.478,  5.557,
         5.062,  5.577,  5.266,  5.991,  5.900,  6.804,  6.452,  6.981,
         10.586, 7.152, 37.248,  5.469, 12.113,  6.645,  8.030, 10.559,
         10.695, 6.878,  6.321, 11.659,  6.210, 17.335,  5.804,  6.146,
         5.067,  5.709,  5.938,  5.718,  8.967,  7.143,  5.078,  7.472,
         5.347, 15.000)

boost <- c(6.104, 6.11, 5.862, 5.07, 5.311, 5.639, 5.402, 5.532, 5.61,
           5.686, 6.129, 5.481, 5.07, 5.602, 5.276, 6.046, 5.818, 6.209,
           6.111, 6.165, 5.828, 6.266, 5.806, 6.087, 5.708, 6.119, 6.406,
           6.096, 6.005, 6.332, 6.522, 6.91, 6.921, 5.765, 6.223, 5.444,
           6.274, 6.603, 5.717, 6.471, 5.968, 6.188, 6.608, 5.333, 5.506,
           5.507, 6.241, 5.981, 5.915, 6.515)

win32 <- c(4.356, 5.354, 4.641, 7.821, 4.664, 4.593, 7.174, 5.596, 4.493,
           4.353, 8.678, 4.459, 5.135, 4.526, 5.043, 4.997, 5.148, 5.03,
           7.421, 6.344, 7.64, 5.491, 6.101, 6.054, 6.626, 6.292, 6.29,
           6.912, 5.691, 5.991, 6.263, 6.096, 6.544, 8.024, 6.405, 6.216,
           6.157, 5.821, 7.759, 5.454, 6.364, 8.168, 8.471, 5.108, 6.192,
           5.385, 5.427, 5.855, 6.099, 5.539)

avg <- c(mean(std), mean(boost), mean(win32))

png("cv_comp_average.png")
barplot(avg, main="Condition Variable Comparison", names.arg = c("STD", "boost", "win32"), ylab="Elapsed Time (seconds)", ylim=c(0, 10))
dev.off()

png("cv_comp_std_dist.png")
hist(std, main="STD elapsed time histgram", xlab="Elapsed Time (seconds)", breaks=seq(0, 50, 1.0), ylim=c(0, 30))
dev.off()

png("cv_comp_boost_dist.png")
hist(boost, main="boost elapsed time histgram", xlab="Elapsed Time (seconds)", breaks=seq(0, 50, 1.0), ylim=c(0, 30))
dev.off()

png("cv_comp_win32_dist.png")
hist(boost, main="win32 elapsed time histgram", xlab="Elapsed Time (seconds)", breaks=seq(0, 50, 1.0), ylim=c(0, 30))
dev.off()
About these ads

About Moto

Engineer who likes coding
This entry was posted in C++, Optimization and tagged , , . Bookmark the permalink.

One Response to Condition variables performance of boost, Win32, and the C++11 standard library

  1. Jaak Ristioja says:

    Hello! Did you ever find out why the C++11 condition variables are slow or whether this has been fixed? I’m getting similar results on Linux.

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out / Change )

Twitter picture

You are commenting using your Twitter account. Log Out / Change )

Facebook photo

You are commenting using your Facebook account. Log Out / Change )

Google+ photo

You are commenting using your Google+ account. Log Out / Change )

Connecting to %s