// L-13 MCS 572 Wed 8 Feb 2012 : parsum_tbb.cpp
// Illustration of parallel_reduce to sum integers.
// Adapted from the Intel Threading Building Blocks Tutorial.

#include <cstdlib>
#include <iostream>
#include "tbb/tbb.h"
#include "tbb/blocked_range.h"
#include "tbb/parallel_reduce.h"
#include "tbb/task_scheduler_init.h"

using namespace std;
using namespace tbb;

class SumIntegers
{
   int *data;
   public:
      int sum; 
      SumIntegers ( int *d ) : data(d), sum(0) {}
      void operator()
         ( const blocked_range<size_t>& r )
      {
         int s = sum;  // must accumulate !
         int *d = data;
         size_t end = r.end();
         for(size_t i=r.begin(); i != end; ++i)
            s += d[i];
         sum = s;
      }
      // the splitting constructor
      SumIntegers ( SumIntegers& x, split ) :
         data(x.data), sum(0) {}
      // the join method does the merge
      void join ( const SumIntegers& x ) { sum += x.sum; }
};

int ParallelSum ( int *x, size_t n )
{
   SumIntegers S(x);

   parallel_reduce(blocked_range<size_t>(0,n), S);

   return S.sum;
}

int main ( int argc, char *argv[] )
{
   int n;
   if(argc > 1)
      n = atoi(argv[1]);
   else
   {
      cout << "give n : ";
      cin >> n;
   }
   int *d;
   d = (int*)calloc(n,sizeof(int));
   for(int i=0; i<n; i++) d[i] = i+1;

   task_scheduler_init init
      (task_scheduler_init::automatic);
   int s = ParallelSum(d,n);

   cout << "the sum is " << s
        << " and it should be " << n*(n+1)/2
        << endl;
   return 0;
}

