POJ-3294 Life Forms n个字符串中出现超过n/2次的最长子串(按字典序依次输出)

时间:2024-12-13 23:07:45

按照以前两个字符串找两者的最长公共子串的思路类似,可以把所有串拼接到一起,这里为了避免讨论LCP跨越多个串需需要特别处理的问题用不同的字符把所有串隔开(因为char只有128位,和可能不够用,更推荐设置成某一特殊字符,在后缀数组初始化的时候在对其映射的int值做处理)。二分长度然后遍历Height,判断SA是否处于同一个串。这里我一开始觉得判断不同串的计数有点麻烦,其实根据LCP确定起点,随终点延伸Height值不会上升的特性,一旦Height小于我们要求的值就切换到下一计数(因为必然不可能再和之前计数的串再产生重合了)。

#include <iostream>
#include <cstring>
#include <string>
#include <cmath>
#include <vector>
#include <algorithm>
#include <cstdio>
#define debug puts("debug")
#define LL long long
using namespace std; const int N=;
string s,temp;
int nn;
int pos[N];
int vis[N];
int phs[N];
int endp[N];
int stx[N],sp; class SF
{
//N:数组大小
public:
int x[N], y[N], c[N];
int Height[N], str[N], SA[N], Rank[N];//Height数组从2开始,SA记录Rank=i的下标
int slen;
int m;//字符集处理大小(传入如果不是数字,需要做位移转换)
bool cmp(int* r, int a, int b, int l)
{
return r[a] == r[b] && r[a + l] == r[b + l];
}
void Suffix(int n)
{
++n;
int i, j, p;
for (i = ; i < m; ++i) c[i] = ;
for (i = ; i < n; ++i) c[x[i] = str[i]]++;
for (i = ; i < m; ++i) c[i] += c[i - ];
for (i = n - ; i >= ; --i) SA[--c[x[i]]] = i; for (j = ; j <= n; j <<= )
{
p = ;
for (i = n - j; i < n; ++i) y[p++] = i;
for (i = ; i < n; ++i) if (SA[i] >= j) y[p++] = SA[i] - j;
for (i = ; i < m; ++i) c[i] = ;
for (i = ; i < n; ++i) c[x[y[i]]]++; for (i = ; i < m; ++i) c[i] += c[i - ];
for (i = n - ; i >= ; --i) SA[--c[x[y[i]]]] = y[i]; swap(x, y);
p = ;
x[SA[]] = ;
for (i = ; i < n; ++i)
{
x[SA[i]] = cmp(y, SA[i - ], SA[i], j) ? p - : p++;
}
if (p >= n)break;
m = p;
} int k = ;
n--;
for (i = ; i <= n; ++i) Rank[SA[i]] = i; for (i = ; i < n; ++i)
{
if (k)--k;
j = SA[Rank[i] - ];
while (str[i + k] == str[j + k])++k;
Height[Rank[i]] = k;
//cout << k << endl;
}
}
static const int bitlen = ;
LL lg2(LL p)//计算log2(n)
{
return (LL)(log(p) / log());
}
LL dp[bitlen][N];
LL bit[bitlen];
void initRMQ()//初始化
{
bit[] = ;
for (int i = ; i < bitlen; i++) bit[i] = * bit[i - ];
for (int i = ; i <= slen; i++)
dp[][i] = Height[i];
dp[][] = dp[][] = ;
for (LL i = ; bit[i] < slen + ; i++)
for (LL j = ; j + bit[i] <= slen + ; j++)
dp[i][j] = min(dp[i - ][j], dp[i - ][j + bit[i - ]]);
}
LL query(LL l, LL r)//查询两个Rank之间的lcp
{
if (r == l) return slen - SA[l];
if (l > r) swap(l, r);
l++;
LL mig = lg2(r - l + 1.0);
return min(dp[mig][l], dp[mig][r - bit[mig] + ]);
}
void init(string s)
{
slen = s.size();
m=;//每次都需要初始化m
int fuck=;
for (int i = ; i < slen; i++)
{
if(s[i]!='#')
str[i] = s[i] - 'a' + ;//如果是字符,映射成从1开始的序列
else str[i]=+(fuck++);
}
str[slen] = ;//1作为结束符,防止越界
Suffix(slen); initRMQ();
}
bool ok(int lx)
{
sp=;
fill(vis,vis+slen+,);
int cnt=;
vis[phs[SA[]]]=;
//cout<<phs[1]<<endl;
for(int i=; i<=slen; i++)
{
//cout<<Height[i]<<endl;
//cout<<phs[i]<<endl;
if(Height[i]>=lx)
{
if(vis[phs[SA[i]]]==)
cnt++,vis[phs[SA[i]]]=;
}
else
{
if(cnt>nn/)
stx[sp++]=SA[i-];
cnt=;
fill(vis,vis+nn+,);
vis[phs[SA[i]]]=;
}
}
if(cnt>nn/)
stx[sp++]=SA[slen-];
return sp;
}
int bins(int l,int r)
{
while(r-l>=)
{
int mid=(l+r)/;
if(ok(mid))l=mid;
else r=mid-;
}
//cout<<l<<' '<<r<<endl;
for(; r>=l; r--)
{
//cout<<r<<endl;
if(ok(r))return r;
}
return ;
} void solve()
{
int f=bins(,slen);
if(!f)cout<<"?"<<endl;
else
{
//sort(stx,stx+f,cmp);
for(int i=; i<sp; i++)
{
//cout<<stx[i]<<endl;
for(int j=; j<f; j++)
cout<<s[stx[i]+j];
cout<<endl;
}
}
}
} sf;
int main()
{
//freopen("in.txt","r",stdin);
//freopen("out.txt","w",stdout);
cin.sync_with_stdio(false);
int cnt=;
while(cin>>nn)
{
if(!nn)break;
if(cnt++)cout<<endl;
s=""; for(int i=; i<nn; i++)
{
cin>>temp;
pos[i]=temp.length();
s+=temp;
s+='#';
}
if(nn==)
{
s=s.substr(,s.length()-);
cout<<s<<endl;
continue;
}
int p=;
for(int i=; i<nn; i++)
{
for(int j=; j<pos[i]; j++,p++)
phs[p]=i;
phs[p++]=nn;
if(i==)endp[i]=pos[i];
else endp[i]=endp[i-]+pos[i]+;
}
sf.init(s); sf.solve();
//sf.ok(1);
}
return ;
}