#include <iostream>
#include <bitset>
#include  <type_traits>
#include  <array>
#include  <limits>

// compile -O2 -std=c++17

// compile time 1-by-1 bit reversal
// used to prepare reversed blocks 
constexpr 
int reverse( int v, int size )
{
    int r = 0;
    for ( int pos=0; pos < size; ++pos ) {
        // copy v[pos] to r[size-1-pos]
        r <<= 1;
        r |= (v%2);
        v >>= 1;
    }
    return r;
}

// block size: 4, 8, 16. 
// 32 is too big for compile time, plus needs 2^32 int storage
constexpr unsigned int SIZE = 16; // has to be a divisor of the number of bits in the reversed number

// compile time function creates an array of reversed bits
// [0] 0000 -> 0000
// [1] 0001 -> 1000
// [2] 0010 -> 0100
// [3] 0011 -> 1100
// ...
template<typename T, T N>
constexpr auto make_constexpr_array(std::integral_constant<T, N>)
{
    std::array<int,N> result = {};
    for (int i = 0; i < N; ++i) {
        result[i] = reverse( i, SIZE );
    }
    return result;
}

constexpr unsigned long long one = 1;
constexpr auto reversed_block = make_constexpr_array(std::integral_constant< unsigned long long, one<<SIZE >{});

// run-time function that reverses in (precomputed) blocks:
// example using blocks size 4:
// 1234abcd5678
// 432100000000, shift 000043210000, ( 1234 -> 4321 was precomputed )
// dcba43210000, shift 0000dcba4321, ( abcd -> dcba was precomputed )
// 8765dcba4321, done                ( 5678 -> 8765 was precomputed )
template< typename T>
T reverseBits(T n) 
{ 
    T rev = 0;

    constexpr unsigned int num_iter = sizeof(T)*8/SIZE;
    for ( int i=0; i< num_iter; ++i ) {
        rev <<= SIZE; // prepare last SIZE bits in rev and move preiously created bits left
        T block = n%(one<<SIZE); // get the last SIZE bits from n
        rev |= reversed_block[block]; // set the last SIZE bits to reversed
        n >>= SIZE; 
    }
    return rev; 
} 

int main() 
{
//    {   // do not use with SIZE = 16
//        unsigned char a[] = { 
//            0b11100001,
//            0b10100111,
//            0b11110001 
//        };
//        for ( auto & v : a ) {
//            std::cout << "v = " << std::bitset< sizeof(char)*8 >( v )  << std::endl;
//            std::cout << "r = " << std::bitset< sizeof(char)*8 >( reverseBits( v ) )  << std::endl;
//        }
//    }

    {
        using T = unsigned int;
        T a[] = { 
            1, 832764, 12987324
        };
        for ( auto & v : a ) {
            std::cout << "v = " << std::bitset< sizeof(T)*8 >( v )  << std::endl;
            std::cout << "r = " << std::bitset< sizeof(T)*8 >( reverseBits( v ) )  << std::endl;
        }
    }
    {
        using T = unsigned long long;
        T a[] = { 
            1, std::numeric_limits<unsigned long long>::max(), 2187363812763, 98732498327498327
        };
        for ( auto & v : a ) {
            std::cout << "v = " << std::bitset< sizeof(T)*8 >( v )  << std::endl;
            std::cout << "r = " << std::bitset< sizeof(T)*8 >( reverseBits( v ) )  << std::endl;
        }
    }
}