#include #include #include #include #include #include #include #include #include using namespace std; namespace { constexpr uint64_t kTargetN = 1'000'000'000'000'000'000ULL; constexpr uint64_t kMod = 1'000'000'000ULL; int floor_log2_u64(uint64_t x) { int r = 0; while (x > 1) { x >>= 1; ++r; } return r; } bool pow_leq(uint64_t base, int exp, uint64_t limit) { unsigned __int128 res = 1; for (int i = 0; i < exp; ++i) { res *= base; if (res > limit) return false; } return true; } uint64_t int_nth_root(uint64_t n, int k) { if (k == 1 || n <= 1) return n; long double approx = std::pow(static_cast(n), 1.0L / k); uint64_t r = static_cast(approx); if (r < 1) r = 1; while (pow_leq(r + 1, k, n)) ++r; while (!pow_leq(r, k, n)) --r; return r; } void mobius_sieve(int nmax, vector& mu) { mu.assign(nmax + 1, 0); vector primes; primes.reserve(nmax / 2); mu[1] = 1; vector spf(nmax + 1, 0); for (int i = 2; i <= nmax; ++i) { if (spf[i] == 0) { spf[i] = i; primes.push_back(i); mu[i] = -1; } for (int p : primes) { long long v = 1LL * p * i; if (v > nmax) break; spf[static_cast(v)] = p; if (i % p == 0) { mu[static_cast(v)] = 0; break; } mu[static_cast(v)] = static_cast(-mu[i]); } } } // Count integers r in [2, M] that are not perfect powers. uint64_t power_free_count_upto(uint64_t M, const vector& mu) { if (M < 2) return 0; int L = floor_log2_u64(M); int max_d = min(L, static_cast(mu.size()) - 1); int64_t total = 0; for (int d = 1; d <= max_d; ++d) { const int8_t md = mu[d]; if (md == 0) continue; uint64_t root = int_nth_root(M, d); if (root >= 2) { total += static_cast(md) * static_cast(root - 1); } } return static_cast(total); } vector> build_coprime_table(int L, const vector& mu) { vector> F(L + 1, vector(L + 1, 0)); for (int A = 1; A <= L; ++A) { for (int B = 1; B <= L; ++B) { int m = min(A, B); int64_t total = 0; for (int d = 1; d <= m; ++d) { int8_t md = mu[d]; if (md == 0) continue; total += static_cast(md) * (A / d) * (B / d); } F[A][B] = total; } } return F; } vector build_countA(uint64_t N, const vector& mu, int L) { vector roots(L + 2, 0); for (int k = 1; k <= L; ++k) { roots[k] = int_nth_root(N, k); } roots[L + 1] = 1; vector pf(L + 2, 0); for (int k = 1; k <= L + 1; ++k) { pf[k] = power_free_count_upto(roots[k], mu); } vector countA(L + 1, 0); for (int A = 1; A <= L; ++A) { countA[A] = pf[A] - pf[A + 1]; } return countA; } uint64_t mod_mul(uint64_t a, uint64_t b, uint64_t mod) { return static_cast((static_cast(a) * b) % mod); } unsigned __int128 compute_irrational_exact(const vector& countA, const vector>& coprime, unsigned threads) { const int L = static_cast(countA.size()) - 1; if (threads <= 1 || L <= 1) { unsigned __int128 irr = 0; for (int A = 1; A <= L; ++A) { uint64_t ca = countA[A]; if (ca == 0) continue; for (int B = 1; B <= L; ++B) { uint64_t cb = countA[B]; if (cb == 0) continue; unsigned __int128 pairs = 0; if (A == B) { if (ca < 2) continue; pairs = static_cast(ca) * (ca - 1); } else { pairs = static_cast(ca) * cb; } irr += pairs * static_cast(coprime[A][B]); } } return irr; } threads = min(threads, static_cast(L)); vector partial(threads, 0); atomic nextA(1); const int chunk = 2; auto worker = [&](unsigned idx) { unsigned __int128 local = 0; while (true) { int start = nextA.fetch_add(chunk); if (start > L) break; int end = min(L, start + chunk - 1); for (int A = start; A <= end; ++A) { uint64_t ca = countA[A]; if (ca == 0) continue; for (int B = 1; B <= L; ++B) { uint64_t cb = countA[B]; if (cb == 0) continue; unsigned __int128 pairs = 0; if (A == B) { if (ca < 2) continue; pairs = static_cast(ca) * (ca - 1); } else { pairs = static_cast(ca) * cb; } local += pairs * static_cast(coprime[A][B]); } } } partial[idx] = local; }; vector pool; pool.reserve(threads); for (unsigned t = 0; t < threads; ++t) { pool.emplace_back(worker, t); } for (auto& th : pool) th.join(); unsigned __int128 total = 0; for (auto v : partial) total += v; return total; } uint64_t compute_irrational_mod(const vector& countA, const vector>& coprime, unsigned threads, uint64_t mod) { const int L = static_cast(countA.size()) - 1; if (threads == 0) threads = 1; if (threads <= 1 || L <= 1) { uint64_t irr = 0; for (int A = 1; A <= L; ++A) { uint64_t ca = countA[A]; if (ca == 0) continue; uint64_t ca_mod = ca % mod; for (int B = 1; B <= L; ++B) { uint64_t cb = countA[B]; if (cb == 0) continue; uint64_t pairs = 0; if (A == B) { if (ca < 2) continue; pairs = mod_mul(ca_mod, (ca - 1) % mod, mod); } else { pairs = mod_mul(ca_mod, cb % mod, mod); } uint64_t term = mod_mul(pairs, static_cast(coprime[A][B]) % mod, mod); irr += term; if (irr >= mod) irr %= mod; } } return irr % mod; } threads = min(threads, static_cast(L)); vector partial(threads, 0); atomic nextA(1); const int chunk = 2; auto worker = [&](unsigned idx) { uint64_t local = 0; while (true) { int start = nextA.fetch_add(chunk); if (start > L) break; int end = min(L, start + chunk - 1); for (int A = start; A <= end; ++A) { uint64_t ca = countA[A]; if (ca == 0) continue; uint64_t ca_mod = ca % mod; for (int B = 1; B <= L; ++B) { uint64_t cb = countA[B]; if (cb == 0) continue; uint64_t pairs = 0; if (A == B) { if (ca < 2) continue; pairs = mod_mul(ca_mod, (ca - 1) % mod, mod); } else { pairs = mod_mul(ca_mod, cb % mod, mod); } uint64_t term = mod_mul(pairs, static_cast(coprime[A][B]) % mod, mod); local += term; if (local >= mod) local %= mod; } } } partial[idx] = local % mod; }; vector pool; pool.reserve(threads); for (unsigned t = 0; t < threads; ++t) { pool.emplace_back(worker, t); } for (auto& th : pool) th.join(); uint64_t total = 0; for (uint64_t v : partial) { total += v; if (total >= mod) total %= mod; } return total % mod; } unsigned __int128 compute_D_exact(uint64_t N, unsigned threads) { const int L = floor_log2_u64(N); vector mu; mobius_sieve(L, mu); auto coprime = build_coprime_table(L, mu); auto countA = build_countA(N, mu, L); unsigned __int128 irrational = compute_irrational_exact(countA, coprime, threads); unsigned __int128 rational = static_cast(coprime[L][L]); return rational + irrational; } uint64_t compute_D_mod(uint64_t N, unsigned threads, uint64_t mod) { const int L = floor_log2_u64(N); vector mu; mobius_sieve(L, mu); auto coprime = build_coprime_table(L, mu); auto countA = build_countA(N, mu, L); uint64_t irrational = compute_irrational_mod(countA, coprime, threads, mod); uint64_t rational = static_cast(coprime[L][L]) % mod; return (rational + irrational) % mod; } string to_string_u128(unsigned __int128 x) { if (x == 0) return "0"; string s; while (x > 0) { int digit = static_cast(x % 10); s.push_back(static_cast('0' + digit)); x /= 10; } reverse(s.begin(), s.end()); return s; } bool run_validations(unsigned threads) { struct TestCase { uint64_t n; uint64_t expected; }; const TestCase tests[] = { {5, 13}, {10, 69}, {100, 9607}, {10000, 99959605}, }; for (const auto& tc : tests) { unsigned __int128 value = compute_D_exact(tc.n, threads); uint64_t got = static_cast(value); if (got != tc.expected) { cerr << "Validation failed: D(" << tc.n << ") = " << got << " (expected " << tc.expected << ")\n"; return false; } } cerr << "Validation checkpoints passed.\n"; return true; } } // namespace int main(int argc, char** argv) { ios::sync_with_stdio(false); cin.tie(nullptr); uint64_t N = kTargetN; unsigned threads = thread::hardware_concurrency(); if (threads == 0) threads = 1; bool validate = true; if (argc >= 2) N = stoull(argv[1]); if (argc >= 3) threads = max(1u, static_cast(stoul(argv[2]))); if (argc >= 4) validate = (stoi(argv[3]) != 0); if (validate && !run_validations(min(threads, 4u))) { return 1; } if (N == kTargetN) { uint64_t answer = compute_D_mod(N, threads, kMod); cout << setfill('0') << setw(9) << answer << "\n"; } else { unsigned __int128 exact = compute_D_exact(N, threads); cout << to_string_u128(exact) << "\n"; } return 0; }