思路
(以下令\(F(n)=f(n)^k\))
首先肯定要莫比乌斯反演,那么可以推出:
\[ans=\sum_{T=1}^n \lfloor\frac n T\rfloor^2\sum_{d|T}F(d)\mu(T/d)
\]
\]
可以整除分块,但后面的东西怎么办呢?
令\(G(T)=F*\mu\),那么就有
\[ans=\sum_{T=1}^n \lfloor\frac n T\rfloor^2G(T)
\]
\]
看到\(\mu\)函数有点烦,考虑用杜教筛的式子消去它。
\[g(1)S(n)=\sum_{i=1}^n (F*\mu*g)(i)-\sum_{d=2}^n S(n/d)
\]
\]
显然令\(g(n)=1\),则\(\mu*g=[n=1]\),于是
\[S(n)=\sum_{i=1}^n F(i)-\sum_{d=2}^n S(n/d)
\]
\]
\(\sum_{i=1}^n F(i)\)可以min_25筛搞出来,然后就做完了。(参见UOJ188. 【UR #13】Sanrd)
复杂度?一个\(O(\sqrt{n})\)的整除分块套上一个\(O(n^{3/4})\)的没有预处理的杜教筛,再套一个\(O(\frac{n^{3/4}}{\log n})\)的min_25筛,但它就是能过QwQ。
就当复杂度是\(O(能过)\)吧。
代码
#include<bits/stdc++.h>
clock_t t=clock();
namespace my_std{
using namespace std;
#define pii pair<int,int>
#define fir first
#define sec second
#define MP make_pair
#define rep(i,x,y) for (int i=(x);i<=(y);i++)
#define drep(i,x,y) for (int i=(x);i>=(y);i--)
#define go(x) for (int i=head[x];i;i=edge[i].nxt)
#define templ template<typename T>
#define sz 2010101
#define mod 4294967296ll
typedef long long ll;
typedef double db;
mt19937 rng(chrono::steady_clock::now().time_since_epoch().count());
templ inline T rnd(T l,T r) {return uniform_int_distribution<T>(l,r)(rng);}
templ inline bool chkmax(T &x,T y){return x<y?x=y,1:0;}
templ inline bool chkmin(T &x,T y){return x>y?x=y,1:0;}
templ inline void read(T& t)
{
t=0;char f=0,ch=getchar();double d=0.1;
while(ch>'9'||ch<'0') f|=(ch=='-'),ch=getchar();
while(ch<='9'&&ch>='0') t=t*10+ch-48,ch=getchar();
if(ch=='.'){ch=getchar();while(ch<='9'&&ch>='0') t+=d*(ch^48),d*=0.1,ch=getchar();}
t=(f?-t:t);
}
template<typename T,typename... Args>inline void read(T& t,Args&... args){read(t); read(args...);}
char __sr[1<<21],__z[20];int __C=-1,__zz=0;
inline void Ot(){fwrite(__sr,1,__C+1,stdout),__C=-1;}
inline void print(register int x)
{
if(__C>1<<20)Ot();if(x<0)__sr[++__C]='-',x=-x;
while(__z[++__zz]=x%10+48,x/=10);
while(__sr[++__C]=__z[__zz],--__zz);__sr[++__C]='\n';
}
void file()
{
#ifndef ONLINE_JUDGE
freopen("a.in","r",stdin);
#endif
}
inline void chktime()
{
#ifndef ONLINE_JUDGE
cout<<(clock()-t)/1000.0<<'\n';
#endif
}
#ifdef mod
ll ksm(ll x,int y){ll ret=1;for (;y;y>>=1,x=x*x%mod) if (y&1) ret=ret*x%mod;return ret;}
ll inv(ll x){return ksm(x,mod-2);}
#else
ll ksm(ll x,int y){ll ret=1;for (;y;y>>=1,x=x*x) if (y&1) ret=ret*x;return ret;}
#endif
// inline ll mul(ll a,ll b){ll d=(ll)(a*(double)b/mod+0.5);ll ret=a*b-d*mod;if (ret<0) ret+=mod;return ret;}
}
using namespace my_std;
int n,K;
int pri[sz],cnt;
ll kpow[sz];
bool npri[sz];
void init()
{
#define x i*pri[j]
rep(i,2,sz-1)
{
if (!npri[i]) pri[++cnt]=i,kpow[cnt]=ksm(i,K);
for (int j=1;j<=cnt&&x<sz;j++)
{
npri[x]=1;
if (i%pri[j]==0) break;
}
}
#undef x
}
namespace SolveF
{
int Sqr;
int w[sz];
int id1[sz],id2[sz],m;
ll g[sz];
int id(int x){return x>=Sqr?id2[n/x]:id1[x];}
ll solve(int n,int j)
{
if (n<=1) return 0;
ll ret=kpow[j-1]*(g[id(n)]-(j-2))%mod;
for (int k=j;1ll*pri[k]*pri[k]<=n;k++)
for (int P=pri[k];1ll*P*pri[k]<=n;P*=pri[k])
(ret+=solve(n/P,k+1))%=mod;
return ret;
}
bool vis[sz];
ll ans[sz];
ll solve(int n){if (vis[id(n)]) return ans[id(n)];vis[id(n)]=1;return ans[id(n)]=solve(n,1)+g[id(n)];}
void init()
{
Sqr=sqrt(n);
for (int i=1,j;i<=n;i=j+1)
{
int x=n/i;j=n/x;w[++m]=x;
if (x<Sqr) id1[x]=m; else id2[j]=m;
g[m]=x-1;
}
rep(i,1,cnt) rep(N,1,m)
{
if (1ll*pri[i]*pri[i]>w[N]) break;
int x=w[N]/pri[i];
g[N]-=g[id(x)]-(i-1);
}
}
}
namespace SolveG
{
unordered_map<int,ll>M;
ll solve(int n)
{
if (n<=1) return 0;
if (M[n]) return M[n];
ll ret=SolveF::solve(n);
for (int i=2,j;i<=n;i=j+1)
{
j=n/(n/i);
(ret-=1ll*(j-i+1)*solve(n/i)%mod-mod)%=mod;
}
return M[n]=ret;
}
}
int main()
{
file();
read(n,K);
init();SolveF::init();
ll ans=0;
for (int l=1,r;l<=n;l=r+1)
{
r=n/(n/l);
ans=(ans+1ll*(n/l)*(n/l)%mod*(SolveG::solve(r)-SolveG::solve(l-1)+mod)%mod)%mod;
}
cout<<ans;
return 0;
}