#include <cstdlib>
#include <omp.h>
#include <iostream>
#include <thread>

#define THREADS 8
#define N 10000000

int main ( ) {
    long long i;
    int const size = 100000;
    int a[size] = {0};

    std::chrono::time_point<std::chrono::system_clock> start = std::chrono::system_clock::now();

#pragma omp parallel num_threads(THREADS) 
#pragma omp for schedule(static) // static chunks
    for (i = 0; i < N; ++i) {
        ++a[i%size];        
    }
    std::chrono::time_point<std::chrono::system_clock> end = std::chrono::system_clock::now();
    std::chrono::duration<double> elapsed_seconds = end-start;
    std::cout << "time elapsed " << elapsed_seconds.count() << std::endl;

#pragma omp for schedule(dynamic) // iterations are assigned one by one 
    for (i = 0; i < N; ++i) {
        ++a[i%size];        
    }
    end = std::chrono::system_clock::now();
    elapsed_seconds = end-start;
    std::cout << "time elapsed " << elapsed_seconds.count() << std::endl;

#pragma omp for schedule(guided) // similar to dynamic, but chunks, chunk size decreases with time
    for (i = 0; i < N; ++i) {
        ++a[i%size];        
    }
    end = std::chrono::system_clock::now();
    elapsed_seconds = end-start;
    std::cout << "time elapsed " << elapsed_seconds.count() << std::endl;

#pragma omp for schedule(static,100) // all chunks 100
    for (i = 0; i < N; ++i) {
        ++a[i%size];        
    }
    end = std::chrono::system_clock::now();
    elapsed_seconds = end-start;
    std::cout << "time elapsed " << elapsed_seconds.count() << std::endl;

#pragma omp for schedule(dynamic,10000) // chunks start with
    for (i = 0; i < N; ++i) {
        ++a[i%size];        
    }
    end = std::chrono::system_clock::now();
    elapsed_seconds = end-start;
    std::cout << "time elapsed " << elapsed_seconds.count() << std::endl;

    return 0;
}
//time elapsed 0.0342188 - static
//time elapsed 0.263785  - dynamic has overhead
//time elapsed 0.356578  - guided
//