SPOJ694 -- DISUBSTR 后缀树组求不相同的子串的个数

时间:2023-09-02 11:52:14

DISUBSTR - Distinct Substrings

 

Given a string, we need to find the total number of its distinct substrings.

Input

T- number of test cases. T<=20;
Each test case consists of one string, whose length is <= 1000

Output

For each test case output one number saying the number of distinct substrings.

Example

Sample Input:
2
CCCCC
ABABA

Sample Output:
5
9

Explanation for the testcase with string ABABA: 
len=1 : A,B
len=2 : AB,BA
len=3 : ABA,BAB
len=4 : ABAB,BABA
len=5 : ABABA
Thus, total number of distinct substrings is 9.

题意:求不同的子串的个数。

首先要知道,任意子串必是某一后缀的前缀。对于suffix[sa[i]],  它有len-sa[i]个前缀,,其中lcp[i]个子串与suffix[sa[i-1]]的前缀重复。。

每次只需要加上 len-sa[i]-lcp[i]就行了。。

 #include <set>
#include <map>
#include <cmath>
#include <ctime>
#include <queue>
#include <stack>
#include <cstdio>
#include <string>
#include <vector>
#include <cstdlib>
#include <cstring>
#include <iostream>
#include <algorithm>
using namespace std;
typedef unsigned long long ull;
typedef long long ll;
const int inf = 0x3f3f3f3f;
const double eps = 1e-;
const int maxn = 2e4+;
int sa[maxn], k, len, tmp[maxn], rank[maxn];
string s;
bool cmp(int i, int j)
{
if (rank[i] != rank[j])
return rank[i] < rank[j];
else
{
int x = (i+k <= len ? rank[i+k] : -);
int y = (j+k <= len ? rank[j+k] : -);
return x < y;
}
}
void build_sa()
{
for (int i = ; i <= len; i++)
{
sa[i] = i;
rank[i] = (i < len ? s[i] : -);
}
for (k = ; k <= len; k *= )
{
sort (sa,sa+len+,cmp);
tmp[sa[]] = ;
for (int i = ; i <= len; i++)
{
tmp[sa[i]] = tmp[sa[i-]] + (cmp(sa[i-],sa[i])? : );
}
for (int i = ; i <= len; i++)
rank[i] = tmp[i];
}
}
int lcp[maxn];
void get_lcp()
{
for (int i = ; i < len; i++)
rank[sa[i]] = i;
int h = ;
lcp[] = ;
for (int i = ; i < len; i++)
{
int j = sa[rank[i]-];
if (h > )
h--;
for (; h+i < len && h+j < len; h++)
if (s[i+h] != s[j+h])
break;
lcp[rank[i]] = h;
}
}
int main()
{
#ifndef ONLINE_JUDGE
freopen("in.txt","r",stdin);
#endif
int T;
scanf ("%d", &T);
while (T--)
{
cin >> s;
len = s.size();
build_sa();
get_lcp();
int ans = ;
for (int i = ; i <= len; i++)
{
ans += len - sa[i] - lcp[i];
}
printf ("%d\n",ans);
}
return ;
}