字符串(后缀数组):POJ 3415 Common Substrings

时间:2023-03-09 09:17:21
字符串(后缀数组):POJ 3415 Common Substrings
Common Substrings

Description

A substring of a string T is defined as:

T(i, k)=TiTi+1...Ti+k-1, 1≤ii+k-1≤|T|.

Given two strings A, B and one integer K, we define S, a set of triples (i, j, k):

S = {(i, j, k) | kK, A(i, k)=B(j, k)}.

You are to give the value of |S| for specific A, B and K.

Input

The input file contains several blocks of data. For each block, the first line contains one integer K, followed by two lines containing strings A and B, respectively. The input file is ended by K=0.

1 ≤ |A|, |B| ≤ 105
1 ≤ Kmin{|A|, |B|}
Characters of A and B are all Latin letters.

Output

For each case, output an integer |S|.

Sample Input

2
aababaa
abaabaa
1
xx
xx
0

Sample Output

22
5
  这道题呃,有些考验程序实践能力。
  题意:对于给定的两个字符串和一个整数K,求两个字符串长度大于等于K的公共子串数目。
  将两个字符串接起来,中间用一个特殊字符隔开,枚举Lcp,暴力枚举是O(n³)的,死活都不可能过。
  这是我们想:能否使用以前枚举的信息?所以正解就出来了:单调栈优化!
  具体咋打就看代码吧~~~
 #include <iostream>
#include <cstring>
#include <cstdio>
using namespace std;
const int maxn=;
char S[maxn];
int sa[maxn],r[maxn],rank[maxn],lcp[maxn];
int Wv[maxn],Ws[maxn],Wa[maxn],Wb[maxn],len; bool cmp(int *p,int a,int b,int l){
return p[a]==p[b]&&p[a+l]==p[b+l];
} void DA(int n,int m){
int i,j,p,*x=Wa,*y=Wb,*t;
for(i=;i<m;i++)Ws[i]=;
for(i=;i<n;i++)++Ws[x[i]=r[i]];
for(i=;i<m;i++)Ws[i]+=Ws[i-];
for(i=n-;i>=;i--)sa[--Ws[x[i]]]=i; for(j=,p=;p<n;m=p,j<<=){
for(p=,i=n-j;i<n;i++)y[p++]=i;
for(i=;i<n;i++)
if(sa[i]>=j)
y[p++]=sa[i]-j; for(i=;i<m;i++)Ws[i]=;
for(i=;i<n;i++)++Ws[Wv[i]=x[y[i]]];
for(i=;i<m;i++)Ws[i]+=Ws[i-];
for(i=n-;i>=;i--)
sa[--Ws[Wv[i]]]=y[i]; for(t=x,x=y,y=t,i=,p=,x[sa[]]=;i<n;i++)
x[sa[i]]=cmp(y,sa[i],sa[i-],j)?p-:p++;
}
} void Lcp(int n){
int i,j,k=;
for(i=;i<=n;i++)rank[sa[i]]=i;
for(i=;i<n;lcp[rank[i++]]=k)
for(k?--k:k,j=sa[rank[i]-];r[i+k]==r[j+k];++k);
} int s[maxn][]; int main(){
int n,k;
while(~scanf("%d",&k)&&k){
scanf("%s",S);
n=strlen(S);S[n]='%';
scanf("%s",S+n+);
len=strlen(S);
for(int i=;i<len;i++)
r[i]=S[i];
r[len]=;
DA(len+,);
Lcp(len); int cnt=;
long long ans=,sum=;
for(int i=;i<=len;i++){
if(lcp[i]<k){
sum=;cnt=;
continue;
}
int tot=;
if(sa[i-]>n){
sum+=lcp[i]-k+;
tot++;
}
while(cnt&&s[cnt][]>=lcp[i]){
tot+=s[cnt][];
sum-=1ll*s[cnt][]*(s[cnt][]-lcp[i]);
cnt--;
}
s[++cnt][]=lcp[i];
s[cnt][]=tot;
if(sa[i]<n)ans+=sum;
}
cnt=;sum=;
for(int i=;i<=len;i++){
if(lcp[i]<k){
sum=;cnt=;
continue;
}
int tot=;
if(sa[i-]<n){
sum+=lcp[i]-k+;
tot++;
}
while(cnt&&s[cnt][]>=lcp[i]){
tot+=s[cnt][];
sum-=1ll*s[cnt][]*(s[cnt][]-lcp[i]);
cnt--;
}
s[++cnt][]=lcp[i];
s[cnt][]=tot;
if(sa[i]>n)ans+=sum;
}
printf("%lld\n",ans);
}
return ;
}