// g++ -std=c++11 -pthread  julia_threads.cpp -DGP2
// GP1 and GP2 are almost identical
// note that actually synchronization is not required at all
// all threads write to different parts of array
#include <cstdlib>
#include <cstdio>
#include <cstring>
//#include <complex> -- slow
#include <thread>
#include <mutex>

#define DIM 500

int savebmp( int w, int h, 
        unsigned char* red, 
        unsigned char* green, 
        unsigned char* blue, 
        char const * filename );

struct cuComplex {
    double   r;
    double   i;
    cuComplex( double a, double b ) : r(a), i(b)  {}
    double magnitude2( void ) { return r * r + i * i; }
    cuComplex operator*(const cuComplex& a) {
        return cuComplex(r*a.r - i*a.i, i*a.r + r*a.i);
    }
    cuComplex operator+(const cuComplex& a) {
        return cuComplex(r+a.r, i+a.i);
    }
};

int julia( int x, int y )
{ 
    const double scale = 0.05; // maximum zoom is 2
    double jx = scale * (double)(DIM/2 - x)/(DIM/2);
    double jy = scale * (double)(DIM/2 - y)/(DIM/2);

//    std::complex<double> c(-0.8, 0.156);
//    std::complex<double> a(jx, jy);
    cuComplex c(-0.8, 0.156);
    cuComplex a(jx, jy);

    int i = 0;
    for (i=0; i<255; i++) {
        a = a * a + c;
        if (a.magnitude2() > 1000)
        //if ( norm( a ) > 1000)
            return i;
    }

    return 255;
}

static std::mutex write_lock;

#ifdef GP1
void generate_pixel( unsigned char* red, unsigned char* green, unsigned char* blue, int x, int y )
{
    int offset = x + y * DIM;
    int juliaValue = julia( x, y );

    write_lock.lock(); // using mutex
    red  [offset] = 0;
    green[offset] = juliaValue;
    blue [offset] = 255-juliaValue;
    write_lock.unlock();
}
#endif

#ifdef GP2
void generate_pixel( unsigned char* red, unsigned char* green, unsigned char* blue, int x, int y )
{
    int offset = x + y * DIM;
    int juliaValue = julia( x, y );

    std::lock_guard<std::mutex> only_one_thread_writes_at_a_time( write_lock ); // using lock
    red  [offset] = 0;
    green[offset] = juliaValue;
    blue [offset] = 255-juliaValue;
}
#endif

void generate_set( unsigned char* red, unsigned char* green, unsigned char* blue, int step, int start_x )
{
    for (int y=0; y<DIM; y++) {
        for (int x=start_x; x<DIM; x+=step) {
            generate_pixel( red, green, blue, x, y );
        }
    }
 }

int main( )
{
    unsigned char* red   = new unsigned char[DIM*DIM];
    unsigned char* green = new unsigned char[DIM*DIM];
    unsigned char* blue  = new unsigned char[DIM*DIM];

    int const num_threads = 8;
    std::thread t[num_threads];

    //Launch a group of threads
    for (int i = 0; i < num_threads; ++i) {
        t[i] = std::thread( generate_set, red, green, blue, num_threads, i ); // i is copied
    }
   
    for (int i = 0; i < num_threads; ++i) {
        t[i].join();
    }
    
    savebmp( DIM, DIM, red, green, blue, "julia.bmp" );
    delete [] red;
    delete [] green;
    delete [] blue;
}

int savebmp( int w, int h, unsigned char* red, unsigned char* green, unsigned char* blue, char const * filename )
{
    FILE *f;
    unsigned char *img = NULL;
    int filesize = 54 + 3*w*h;  //w is your image width, h is image height, both int
    
    img = ( unsigned char * )malloc( 3*w*h );
    memset( img,0,sizeof( img ) );
    for( int i=0; i<w; i++ ) {
        for( int j=0; j<h; j++ ) {
            int x=i;
            int y=( h-1 )-j;
            img[( x+y*w )*3+2] = ( unsigned char )( red  [i*w+j] );
            img[( x+y*w )*3+1] = ( unsigned char )( green[i*w+j] );
            img[( x+y*w )*3+0] = ( unsigned char )( blue [i*w+j] );
        }
    }
    unsigned char bmpfileheader[14] = {'B','M', 0,0,0,0, 0,0, 0,0, 54,0,0,0};
    unsigned char bmpinfoheader[40] = {40,0,0,0, 0,0,0,0, 0,0,0,0, 1,0, 24,0};
    unsigned char bmppad[3] = {0,0,0};
    bmpfileheader[ 2] = ( unsigned char )( filesize );
    bmpfileheader[ 3] = ( unsigned char )( filesize>> 8 );
    bmpfileheader[ 4] = ( unsigned char )( filesize>>16 );
    bmpfileheader[ 5] = ( unsigned char )( filesize>>24 );
    bmpinfoheader[ 4] = ( unsigned char )( w );
    bmpinfoheader[ 5] = ( unsigned char )( w>> 8 );
    bmpinfoheader[ 6] = ( unsigned char )( w>>16 );
    bmpinfoheader[ 7] = ( unsigned char )( w>>24 );
    bmpinfoheader[ 8] = ( unsigned char )( h );
    bmpinfoheader[ 9] = ( unsigned char )( h>> 8 );
    bmpinfoheader[10] = ( unsigned char )( h>>16 );
    bmpinfoheader[11] = ( unsigned char )( h>>24 );
    f = fopen( filename, "wb" );
    fwrite( bmpfileheader,1,14,f );
    fwrite( bmpinfoheader,1,40,f );
    for( int i=0; i<h; i++ ) {
        fwrite( img+( w*( h-i-1 )*3 ),3,w,f );
        fwrite( bmppad,1,( 4-( w*3 )%4 )%4,f );
    }
    free( img );
    fclose( f );
}