public class Euler792 {

    static class Mod2Pow64 {
        static final int E = 64;
        static final int D = 2 * E - 2;

        long[] FSmall = new long[D + 1];
        long[] dF = new long[D + 1];
        long[] invSmall = new long[D + 1];

        static long invOdd(long a) {
            if ((a & 1L) == 0) {
                throw new RuntimeException("invOdd called with even input");
            }
            long x = 1;
            for (int i = 0; i < 6; ++i) {
                x = x * (2L - a * x);
            }
            return x;
        }

        Mod2Pow64() {
            FSmall[0] = 1;
            for (int u = 1; u <= D; ++u) {
                long factor = (long) (2 * (u - 1) + 1);
                FSmall[u] = FSmall[u - 1] * factor;
            }

            long[] tmp = FSmall.clone();
            for (int k = 0; k <= D; ++k) {
                dF[k] = tmp[0];
                for (int i = 0; i <= D - k - 1; ++i) {
                    tmp[i] = tmp[i + 1] - tmp[i];
                }
            }

            for (int x = 1; x <= D; x += 2) {
                invSmall[x] = invOdd((long) x);
            }
        }

        long evalF(long u) {
            if (Long.compareUnsigned(u, D) <= 0)
                return FSmall[(int) u];

            long res = dF[0];
            long binomOdd = 1;
            int exp2 = 0;

            for (int k = 1; k <= D; ++k) {
                long num = u - k + 1;
                long den = k;

                int tzNum = Long.numberOfTrailingZeros(num);
                int tzDen = Long.numberOfTrailingZeros(den);

                exp2 += tzNum - tzDen;

                binomOdd *= (num >>> tzNum);
                long denOdd = (den >>> tzDen);
                binomOdd *= invSmall[(int) denOdd];

                long binom = (exp2 >= 64) ? 0L : (binomOdd << exp2);
                res += dF[k] * binom;
            }
            return res;
        }

        long oddFact(long n) {
            long res = 1;
            while (n > 0) {
                long m = (n + 1) >>> 1;
                res *= evalF(m);
                n >>>= 1;
            }
            return res;
        }

        long oddCentralBinom(long k) {
            long a = oddFact(2 * k);
            long b = oddFact(k);
            long invb = invOdd(b);
            return a * invb * invb;
        }
    }

    static long uValue(long n, Mod2Pow64 mod) {
        long base = n + 1;
        long O = mod.oddCentralBinom(base);
        long k = base;

        int pc = Long.bitCount(k);
        long B = (pc >= 64) ? 0L : (O << pc);

        long sum = 0;
        for (int j = 0; j < 64; ++j) {
            long term = B << j;
            if ((j & 1) == 1)
                term = -term;
            sum += term;

            if (j != 63) {
                long denom = k + 1;
                int tz = Long.numberOfTrailingZeros(denom);
                long denomOdd = denom >>> tz;
                long inv = Mod2Pow64.invOdd(denomOdd);

                O *= (2 * k + 1);
                O *= inv;

                k++;
                pc = Long.bitCount(k);
                B = (pc >= 64) ? 0L : (O << pc);
            }
        }

        long c = -3L * sum;
        if (c == 0) {
            throw new RuntimeException("Need higher 2-adic precision (c==0 mod 2^64)");
        }

        int tz = Long.numberOfTrailingZeros(c);
        return base + tz;
    }

    static long UValue(int N) {
        Mod2Pow64 mod = new Mod2Pow64();
        long[] partials = new long[16];
        Thread[] threads = new Thread[16];

        int numThreads = Runtime.getRuntime().availableProcessors();
        if (numThreads > 16)
            numThreads = 16;
        if (numThreads <= 0)
            numThreads = 1;

        int chunk = (N + numThreads - 1) / numThreads;

        for (int t = 0; t < numThreads; t++) {
            final int tid = t;
            int L = t * chunk + 1;
            int R = Math.min(N, (t + 1) * chunk);

            if (L > R)
                break;

            threads[t] = new Thread(() -> {
                long local = 0;
                for (int n = L; n <= R; n++) {
                    long x = n;
                    long cube = x * x * x;
                    local += uValue(cube, mod);
                }
                partials[tid] = local;
            });
            threads[t].start();
        }

        long total = 0;
        for (int t = 0; t < numThreads; t++) {
            if (threads[t] != null) {
                try {
                    threads[t].join();
                    total += partials[t];
                } catch (InterruptedException e) {
                    e.printStackTrace();
                }
            }
        }

        return total;
    }

    public static String solve() {
        return Long.toString(UValue(10000));
    }

    public static void main(String[] args) {
        System.out.println(solve());
    }
}
