We define D( {A 1,A 2,…A n} ) = the number of element that satisfy A i=i
For example, D( {1,2,3} ) = 3 ,D( {1,3,2} ) = 1 (only ‘1’ is at 1), D({3,1,2}) = 0 ….
Now we want to calculate the number of permutation that satisfy D( {A 1,A 2,…A n} ) = K.
For example, if n = 3 and k = 3, then of course there is only one permutation {1,2,3} that satisfy D( {1,2,3}) = 3. But if n = 3 and k = 0, then there are two permutations {3,1,2} and {2,3,1} satisfy D( {3,2,1} ) = D( {2,3,1} ) = 0;
But when n is very large, it’s hard to calculate by brute force algorithm. Optimal is one required here.
Because the answer may be very large, so just output the remainder of the answer after divided by m.
In each case, the first line contains three integers n,k and m. (0 <= k<= n <=10^9, 1 <= m <= 10^5, n != 0)
2
3 0 7
3 3 3
Case 1: 2
Case 2: 1
//
#include<iostream>
#include<cstring>
#include<cstdio>
#include<algorithm>
#include<cmath>
using namespace std;
#define F 200
#define M 200010
long long n,K,m;
long long mp[F],mr;
long long xa[F],xb[F],xc[F];
long long circle[M] = {1};
void divide_factor(long long x){
long long i,k;
k = (long long)sqrt((double)x) + 1;
mr = 0;
for(i = 2; i <= k && x != 1; i++)
if(x % i == 0){
++mr; mp[mr] = i; xc[mr] = 1;
while(x % i == 0){x /= i; xc[mr] = xc[mr]*i;}
}
if(x != 1){++mr; mp[mr] = xc[mr] = x;}
}
long long get_fct(long long x, long long y){
long long ret = 0;
while(x != 0){ret += (x/y); x /= y;}
return ret;
}
long long cnt_exp(long long x, long long y, long long z){
long long ret = 1;
while( y != 0){
if(y % 2 == 1) ret = (ret*x)%z;
x = (x*x)%z; y = y >> 1;
}
return ret;
}
long long extend_gcd(long long a, long long b, long long &x, long long &y, long long z){
long long i,tmp;
if(b == 0) {x = 1; y = 0; return a;}
i = extend_gcd(b,a%b,x,y,z);
tmp = x; x = y; y = (tmp-a/b*y)%z;
return i;
}
long long CRT(){
long long i;
long long ret = 0;
for(i = 1; i <= mr; i++){
long long x,y;
extend_gcd(m/xc[i],xc[i],x,y,xc[i]);
x = x % m;
ret = (ret + xb[i]*m/xc[i]*x)%m;
}
return (ret+m)%m;
}
long long reverse(long long a, long long b){
long long x,y;
extend_gcd(a,b,x,y,b);
return (x%b + b) % b;
}
long long cnt_c(long long nn, long long mm){//m可以分解成多个比较小的pi^k的乘积
long long i,j,k;
divide_factor(m);
for(i = 1; i <= mr; i++){
long long fct_num = get_fct(nn,mp[i])-get_fct(mm,mp[i])-get_fct(nn-mm,mp[i]);
for(j = 2,circle[1] = 1; j < xc[i]; j++)
if(j % mp[i] != 0) circle[j] = (circle[j-1]*j)%xc[i];
else circle[j] = circle[j-1];
xb[i] = 1; k = nn;
while(k != 0){
xb[i] = ( xb[i]*( (cnt_exp(circle[xc[i]-1],k/xc[i],xc[i])*circle[k-k/xc[i]*xc[i]])%xc[i] ) ) % xc[i];
k = k/mp[i];
}
for(j = 2,circle[1] = 1; j < xc[i]; j++ ){
if(j % mp[i] != 0) circle[j] = (circle[j-1]*reverse(j,xc[i]))%xc[i];
else circle[j] = circle[j-1];
}
k = mm;
while(k != 0){
xb[i] = ( xb[i]*( (cnt_exp(circle[xc[i]-1],k/xc[i],xc[i])*circle[k-k/xc[i]*xc[i]])%xc[i] ) ) % xc[i];
k = k/mp[i];
}
k = nn-mm;
while(k != 0){
xb[i] = ( xb[i]*( (cnt_exp(circle[xc[i]-1],k/xc[i],xc[i])*circle[k-k/xc[i]*xc[i]])%xc[i] ) ) % xc[i];
k = k/mp[i];
}
while(fct_num--) xb[i] = (xb[i]*mp[i])%xc[i];
}
return CRT();
}
long long cnt_pos(long long x){//h[x]%m 错排列的周期性
long long i,ret;
if(x == 0) return 1;
x = x % (2*m);
if(x == 0) x = 2*m;
for(i = 2,ret = 0; i <= x; i++) ret = (ret*i + (i%2==0?1:-1))%m;
return (ret+m)%m;
}
int main(){
int cas,k;
scanf("%d",&cas);
for(k = 1; k <= cas; k++){
scanf("%I64d%I64d%I64d",&n,&K,&m);
long long ans = cnt_c(n,K);
ans = (cnt_pos(n-K)*ans)%m;
printf("Case %d: %I64d\n",k,ans);
}
return 0;
}