public class Euler798 {
    static final int MOD = 1000000007;

    static int addmod(int a, int b) {
        int s = a + b;
        if (s >= MOD)
            s -= MOD;
        return s;
    }

    static int submod(int a, int b) {
        int s = a - b;
        if (s < 0)
            s += MOD;
        return s;
    }

    static long modPow(long a, long e) {
        long r = 1;
        a %= MOD;
        while (e > 0) {
            if ((e & 1L) == 1)
                r = (r * a) % MOD;
            a = (a * a) % MOD;
            e >>= 1L;
        }
        return r;
    }

    static int nextPow2(int x) {
        int p = 1;
        while (p < x)
            p <<= 1;
        return p;
    }

    static int[] computeDistributionOneSuit(int n, int L, boolean doChecks) {
        int[] arr = new int[L];
        if (n <= 0)
            return arr;
        if (n == 1) {
            arr[0] = 2;
            return arr;
        }

        int maxInv = n;
        long[] inv = new long[maxInv + 1];
        inv[1] = 1;
        for (int i = 2; i <= maxInv; i++) {
            inv[i] = MOD - (MOD / i) * inv[MOD % i] % MOD;
        }

        long inv2 = (MOD + 1) / 2;
        long pow2N = modPow(2, n - 2);
        arr[0] = (int) ((pow2N + 2) % MOD);

        long sumDist = 0;
        if (doChecks)
            sumDist = arr[0];

        long B = 1;
        long T = 1;
        int mMax = (n - 1) / 2;

        for (int m = 0; m <= mMax; m++) {
            long denomA = (long) n - 1 - 2L * m;
            long A = 0;
            if (m >= 1) {
                if (denomA == 0)
                    A = 1;
                else
                    A = B * m % MOD * inv[(int) denomA] % MOD;
            }

            long P = (B + A) % MOD;
            long Q = P * (n - 2L * m - 1) % MOD * inv[m + 1] % MOD;

            int godd = 2 * m + 1;
            if (godd < n) {
                long val = (pow2N + Q - T) % MOD;
                if (val < 0)
                    val += MOD;
                arr[godd] = (int) val;
                if (doChecks)
                    sumDist += val;
            }

            int geven = 2 * m;
            if (m >= 1 && geven < n) {
                long val = (pow2N + P + B - T) % MOD;
                if (val < 0)
                    val += MOD;
                arr[geven] = (int) val;
                if (doChecks)
                    sumDist += val;
            }

            if (m == mMax)
                break;

            long num1 = (long) n - 2 - 2L * m;
            long num2 = (long) n - 3 - 2L * m;
            long den1 = m + 1;
            long den2 = n - 2 - m;

            B = B * (num1 % MOD) % MOD;
            B = B * (num2 % MOD) % MOD;
            B = B * inv[(int) den1] % MOD;
            B = B * inv[(int) den2] % MOD;

            pow2N = pow2N * inv2 % MOD;

            int m1 = m + 1;
            long denomANext = (long) n - 1 - 2L * m1;
            long ANext = 0;
            if (denomANext == 0)
                ANext = 1;
            else
                ANext = B * m1 % MOD * inv[(int) denomANext] % MOD;

            T = (T + ANext) % MOD * inv2 % MOD;
            T = (T + B) % MOD;
        }

        if (doChecks) {
            sumDist %= MOD;
            long want = modPow(2, n);
            if (sumDist != want) {
                throw new RuntimeException("CHECK FAILED sumDist");
            }
            if (arr[n - 1] != 1) {
                throw new RuntimeException("CHECK FAILED arr[start_n-1]");
            }
        }

        return arr;
    }

    static void walshHadamardXor(int[] a) {
        int n = a.length;
        int numThreads = Runtime.getRuntime().availableProcessors();
        if (numThreads <= 0)
            numThreads = 1;

        for (int len = 1; len < n; len <<= 1) {
            int step = len << 1;
            int blocks = n / step;
            if (numThreads > 1 && blocks >= 2048) {
                int tcnt = Math.min(numThreads, blocks);
                Thread[] threads = new Thread[tcnt];
                int per = (blocks + tcnt - 1) / tcnt;

                for (int t = 0; t < tcnt; t++) {
                    final int startBlock = t * per;
                    final int endBlock = Math.min(blocks, startBlock + per);
                    final int currentLen = len;
                    final int currentStep = step;

                    threads[t] = new Thread(() -> {
                        for (int b = startBlock; b < endBlock; b++) {
                            int i = b * currentStep;
                            for (int j = 0; j < currentLen; j++) {
                                int u = a[i + j];
                                int v = a[i + j + currentLen];
                                a[i + j] = addmod(u, v);
                                a[i + j + currentLen] = submod(u, v);
                            }
                        }
                    });
                    threads[t].start();
                }
                for (int t = 0; t < tcnt; t++) {
                    try {
                        threads[t].join();
                    } catch (Exception e) {
                    }
                }
            } else {
                for (int i = 0; i < n; i += step) {
                    for (int j = 0; j < len; j++) {
                        int u = a[i + j];
                        int v = a[i + j + len];
                        a[i + j] = addmod(u, v);
                        a[i + j + len] = submod(u, v);
                    }
                }
            }
        }
    }

    static int solveC(int n, int s, boolean doChecks) {
        int L = nextPow2(n);
        int[] A = computeDistributionOneSuit(n, L, doChecks);

        walshHadamardXor(A);

        int numThreads = Runtime.getRuntime().availableProcessors();
        if (numThreads <= 0)
            numThreads = 1;

        long[] partials = new long[numThreads];
        Thread[] threads = new Thread[numThreads];
        int per = (L + numThreads - 1) / numThreads;

        for (int t = 0; t < numThreads; t++) {
            final int tid = t;
            final int start = t * per;
            final int end = Math.min(L, start + per);
            if (start >= end)
                continue;

            threads[t] = new Thread(() -> {
                long sum = 0;
                for (int i = start; i < end; i++) {
                    if (A[i] != 0) {
                        sum += modPow(A[i], s);
                    }
                }
                partials[tid] = sum % MOD;
            });
            threads[t].start();
        }

        long total = 0;
        for (int t = 0; t < numThreads; t++) {
            if (threads[t] != null) {
                try {
                    threads[t].join();
                    total += partials[t];
                } catch (Exception e) {
                }
            }
        }
        total %= MOD;

        long invL = modPow(L, MOD - 2);
        return (int) ((total * invL) % MOD);
    }

    public static String solve() {
        return Integer.toString(solveC(10000000, 10000000, false));
    }

    public static void main(String[] args) {
        System.out.println(solve());
    }
}
