关于后缀自动机的总结

时间:2022-05-08 09:45:04

学习请看clj冬令营的讲稿吧,这主要是从SAM讲的

另一个可以看http://fanhq666.blog.163.com/blog/static/8194342620123352232937/,主要是从后缀树方面讲的,殊途同归

不过个人感觉从后缀树的方面更容易理解……

这里简单的说一下SAM的几个重要的性质

1.在SAM上,起点到任意一点的所有路径无重复的识别了所有子串

2.每个子串s出现的次数即ST(s)的right集合的大小(即子串s出现的所有右端点r的集合)

具体的就是parent树上子树right集合的并

3.(这是我认为最美妙的性质)SAM所构建出来的parent树与对原串逆序所购建出来的后缀树节点一一对应

具体的,SAM上每个节点最长接受子串=根节点到后缀树上每个点路径对应的字符串的逆序

4. 自动机的go[state][char]在parent树(后缀树)上的含义为(逆序)后缀树state节点代表的后缀前面加一个字符char的串所对应的节点

看图,(吐槽一下fhq博客的回文串)

关于后缀自动机的总结关于后缀自动机的总结

一目了然了吧,正是有SAM在构造的过程中把一棵后缀树构造出来了,所以后缀数组能做的题目基本上SAM也能做

而且很多跟子串有关的题目,运用SAM明显比较简单

下面随便扯几个题目

bzoj3998 运用性质2,然后在拓扑图上维护一个前缀和即可

关于后缀自动机的总结关于后缀自动机的总结
  1 var mx,sa,fa:array[0..1000010] of longint;
  2     sum,w:array[0..1000010] of int64;
  3     go:array[0..1000010,'a'..'z'] of longint;
  4     i,n,ty,k,x,j,t,last:longint;
  5     s:ansistring;
  6     c:char;
  7 
  8 procedure add(c:char);
  9   var p,np,nq,q:longint;
 10   begin
 11     inc(t);  p:=last;
 12     last:=t; np:=t; w[np]:=1;
 13     mx[np]:=mx[p]+1;
 14     while (p<>0) and (go[p,c]=0) do
 15     begin
 16       go[p,c]:=np;
 17       p:=fa[p];
 18     end;
 19     if p=0 then fa[np]:=1
 20     else begin
 21       q:=go[p,c];
 22       if mx[p]+1=mx[q] then fa[np]:=q
 23       else begin
 24         inc(t); nq:=t;
 25         mx[nq]:=mx[p]+1;
 26         go[nq]:=go[q];
 27         fa[nq]:=fa[q];
 28         fa[q]:=nq; fa[np]:=nq;
 29         while go[p,c]=q do
 30         begin
 31           go[p,c]:=nq;
 32           p:=fa[p];
 33         end;
 34       end;
 35     end;
 36   end;
 37 
 38 procedure pre;
 39   var c:char;
 40   begin
 41     for i:=1 to t do
 42       inc(sum[mx[i]]);
 43     for i:=1 to n do
 44       inc(sum[i],sum[i-1]);
 45     for i:=t downto 1 do
 46     begin
 47       sa[sum[mx[i]]]:=i;
 48       dec(sum[mx[i]]);
 49     end;
 50     fillchar(sum,sizeof(sum),0);
 51     for i:=t downto 1 do
 52     begin
 53       x:=sa[i];
 54       if ty=1 then w[fa[x]]:=w[fa[x]]+w[x]
 55       else w[x]:=1;
 56     end;
 57     w[1]:=0;
 58     for i:=t downto 1 do
 59     begin
 60       x:=sa[i];
 61       sum[x]:=w[x];
 62       for c:='a' to 'z' do
 63         sum[x]:=sum[x]+sum[go[x,c]];
 64     end;
 65   end;
 66 
 67 procedure get(x,k:longint);
 68   var c:char;
 69       y:longint;
 70   begin
 71     while true do
 72     begin
 73       if k<=w[x] then exit;
 74       k:=k-w[x];
 75       for c:='a' to 'z' do
 76       begin
 77         y:=go[x,c];
 78         if y<>0 then
 79         begin
 80           if k<=sum[y] then
 81           begin
 82             write(c);
 83             x:=y;
 84             break;
 85           end;
 86           k:=k-sum[y];
 87         end;
 88       end;
 89     end;
 90   end;
 91 
 92 begin
 93   readln(s);
 94   n:=length(s);
 95   last:=1; t:=1;
 96   for i:=1 to n do
 97     add(s[i]);
 98   readln(ty,k);
 99   pre;
100   if k>sum[1] then writeln(-1)
101   else get(1,k);
102 end.
3998

 

bzoj3676 先用manacher找出所有本质不同回文串,然后放到SAM上跑即可

怎么跑?当然不是一个个匹配啦,把一个子串当作某个后缀的前缀,然后找到后缀对应的节点

然后倍增往上提~

(听说有个东西叫回文自动机?……)

关于后缀自动机的总结关于后缀自动机的总结
  1 var go:array[0..600010,'a'..'z'] of longint;
  2     sa,sum,d,fa,mx:array[0..600010] of longint;
  3     p,loc:array[0..310000] of longint;
  4     anc:array[0..600010,0..18] of longint;
  5     w:array[0..600010] of int64;
  6     s:array[0..310000] of char;
  7     last,t,n:longint;
  8     ans:int64;
  9 
 10 function min(a,b:longint):longint;
 11   begin
 12     if a>b then exit(b) else exit(a);
 13   end;
 14 
 15 function max(a,b:int64):int64;
 16   begin
 17     if a>b then exit(a) else exit(b);
 18   end;
 19 
 20 procedure add(c:char);
 21   var p,q,np,nq:longint;
 22   begin
 23     p:=last;
 24     inc(t); last:=t; np:=t;
 25     w[np]:=1; mx[np]:=mx[p]+1;
 26     while (p<>0) and (go[p,c]=0) do
 27     begin
 28       go[p,c]:=np;
 29       p:=fa[p];
 30     end;
 31     if p=0 then fa[np]:=1
 32     else begin
 33       q:=go[p,c];
 34       if mx[q]=mx[p]+1 then fa[np]:=q
 35       else begin
 36         inc(t); nq:=t;
 37         mx[nq]:=mx[p]+1;
 38         go[nq]:=go[q];
 39         fa[nq]:=fa[q];
 40         fa[q]:=nq; fa[np]:=nq;
 41         while go[p,c]=q do
 42         begin
 43           go[p,c]:=nq;
 44           p:=fa[p];
 45         end;
 46       end;
 47     end;
 48   end;
 49 
 50 procedure prework;
 51   var x,y,i,j:longint; c:char;
 52   begin
 53     for i:=1 to t do
 54       inc(sum[mx[i]]);
 55     for i:=1 to n do
 56       inc(sum[i],sum[i-1]);
 57     for i:=t downto 1 do
 58     begin
 59       sa[sum[mx[i]]]:=i;
 60       dec(sum[mx[i]]);
 61     end;
 62     for i:=t downto 1 do
 63     begin
 64       x:=sa[i];
 65       w[fa[x]]:=w[fa[x]]+w[x];
 66     end;
 67     for i:=1 to t do
 68     begin
 69       x:=sa[i];
 70       d[x]:=d[fa[x]]+1;
 71       anc[x,0]:=fa[x];
 72       for j:=1 to 18 do
 73       begin
 74         y:=anc[x,j-1];
 75         if anc[y,j-1]<>0 then anc[x,j]:=anc[y,j-1] else break;
 76       end;
 77     end;
 78   end;
 79 
 80 procedure getchar;
 81   var ch:char;
 82   begin
 83     read(ch);
 84     while (ch>='a') and (ch<='z') do
 85     begin
 86       inc(n);
 87       s[n]:=ch;
 88       add(ch);
 89       loc[n]:=last;
 90       read(ch);
 91     end;
 92   end;
 93 
 94 procedure find(l,r:longint);
 95   var x,i:longint;
 96   begin
 97     x:=loc[r];
 98     for i:=18 downto 0 do
 99       if mx[anc[x,i]]>=r-l+1 then x:=anc[x,i];
100     ans:=max(ans,int64(r-l+1)*w[x]);
101   end;
102 
103 procedure manacher;
104   var i,k,right:longint;
105   begin
106     right:=0; k:=0;
107     for i:=1 to n do
108     begin
109       if right>i then p[i]:=min(right-i,p[2*k-i])
110       else begin
111         p[i]:=1;
112         find(i-p[i]+1,i+p[i]-1);
113       end;
114       while s[i+p[i]]=s[i-p[i]] do
115       begin
116         inc(p[i]);
117         find(i-p[i]+1,i+p[i]-1);
118       end;
119       if p[i]+i>right then
120       begin
121         k:=i;
122         right:=p[i]+i;
123       end;
124     end;
125     right:=0; k:=0;
126     for i:=1 to n do
127     begin
128       if right>i then p[i]:=min(right-i,p[2*k-i-1])
129       else p[i]:=0;
130       while s[i+p[i]+1]=s[i-p[i]] do
131       begin
132         inc(p[i]);
133         find(i-p[i]+1,i+p[i]);
134       end;
135       if p[i]+i>right then
136       begin
137         k:=i;
138         right:=p[i]+i;
139       end;
140     end;
141   end;
142 
143 begin
144   last:=1; t:=1;
145   getchar;
146   s[0]:='#'; s[n+1]:='$';
147   prework;
148   manacher;
149   writeln(ans);
150 end.
3676

 

bzoj1396 2865(注意2865数据规模大了,SAM注意卡空间)

注意出现一次的子串s ST(s)一定是parent树上的叶子节点

clj ppt中有一个性质,一个节点p可接受的子串长度=[min(s),max(s)]=[max(fa[p])+1,max(s)]

所以我们反序构造SAM,叶子节点代表了一个后缀i

这样我们就可以更新,对于[i,i+max(fa[s])+1] 我们用max(fa[s])+1更新

对于j属于[i+max(fa[s])+2,n]我们用j-i更新

相当于一个线段覆盖问题,只不过一种线段斜率为0,一种线段斜率为1,我们可以分别用线段树维护

关于后缀自动机的总结关于后缀自动机的总结
  1 const inf=100000007;
  2 var go:array[0..1000010,'a'..'z'] of longint;
  3     mx,fa,w:array[0..1000010] of longint;
  4     v:array[0..1000010] of boolean;
  5     tree:array[0..500010*3,0..1] of longint;
  6     i,n,t,last:longint;
  7     s:ansistring;
  8 
  9 function min(a,b:longint):longint;
 10   begin
 11     if a>b then exit(b) else exit(a);
 12   end;
 13 
 14 procedure add(c:char);
 15   var p,q,np,nq:longint;
 16   begin
 17     p:=last;
 18     inc(t); last:=t; np:=t;
 19     mx[np]:=mx[p]+1;
 20     while (p<>0) and (go[p,c]=0) do
 21     begin
 22       go[p,c]:=np;
 23       p:=fa[p];
 24     end;
 25     if p=0 then fa[np]:=1
 26     else begin
 27       q:=go[p,c];
 28       if mx[q]=mx[p]+1 then fa[np]:=q
 29       else begin
 30         inc(t); nq:=t;
 31         mx[nq]:=mx[p]+1;
 32         go[nq]:=go[q];
 33         fa[nq]:=fa[q];
 34         fa[q]:=nq; fa[np]:=nq;
 35         while go[p,c]=q do
 36         begin
 37           go[p,c]:=nq;
 38           p:=fa[p];
 39         end;
 40       end;
 41     end;
 42   end;
 43 
 44 procedure build(i,l,r:longint);
 45   var m:longint;
 46   begin
 47     tree[i,0]:=inf;
 48     tree[i,1]:=inf;
 49     if l<>r then
 50     begin
 51       m:=(l+r) shr 1;
 52       build(i*2,l,m);
 53       build(i*2+1,m+1,r);
 54     end;
 55   end;
 56 
 57 procedure cov(i,q,z:longint);
 58   begin
 59     tree[i,q]:=min(tree[i,q],z);
 60   end;
 61 
 62 procedure push(i,q:longint);
 63   begin
 64     cov(i*2,q,tree[i,q]);
 65     cov(i*2+1,q,tree[i,q]);
 66     tree[i,q]:=inf;
 67   end;
 68 
 69 procedure work(i,l,r,q,x,y,z:longint);
 70   var m:longint;
 71   begin
 72     if (x<=l) and (y>=r) then cov(i,q,z)
 73     else begin
 74       if tree[i,q]<inf then push(i,q);
 75       m:=(l+r) shr 1;
 76       if x<=m then work(i*2,l,m,q,x,y,z);
 77       if y>m then work(i*2+1,m+1,r,q,x,y,z);
 78     end;
 79   end;
 80 
 81 procedure ask(i,l,r:longint);
 82   var m:longint;
 83   begin
 84     if l=r then writeln(min(n,min(tree[i,0],tree[i,1]+l)))
 85     else begin
 86       if tree[i,0]<inf then push(i,0);
 87       if tree[i,1]<inf then push(i,1);
 88       m:=(l+r) shr 1;
 89       ask(i*2,l,m);
 90       ask(i*2+1,m+1,r);
 91     end;
 92   end;
 93 
 94 begin
 95   readln(s);
 96   n:=length(s);
 97   t:=1; last:=1;
 98   for i:=n downto 1 do
 99   begin
100     add(s[i]);
101     w[last]:=i;
102   end;
103   for i:=1 to t do
104     v[fa[i]]:=true;
105 
106   build(1,1,n);
107   for i:=1 to t do
108     if not v[i] then
109     begin
110       work(1,1,n,0,w[i],w[i]+mx[fa[i]],mx[fa[i]]+1);
111       if w[i]+mx[fa[i]]<n then
112         work(1,1,n,1,w[i]+mx[fa[i]]+1,n,-w[i]+1);
113     end;
114   ask(1,1,n);
115 end.
2865

 

bzoj2946 如果用SA做,是经典的二分然后对height分组

用SAM怎么做呢?我们可以先把一个串的SAM建出来,然后用其它串匹配

得出每个节点在这个串匹配的最大长度,然后可以得到每个节点在所有串匹配的最大值的最小值

然后取所有节点这个值的最大值即可(很拗口,但是很明显)

关于后缀自动机的总结关于后缀自动机的总结
  1 var go:array[0..20010,'a'..'z'] of longint;
  2     sum,mx,fa,f,a,sa:array[0..20010] of longint;
  3     ans,m,last,t,i,n,l,j,p,len:longint;
  4     s:ansistring;
  5 
  6 function min(a,b:longint):longint;
  7   begin
  8     if a>b then exit(b) else exit(a);
  9   end;
 10 
 11 function max(a,b:longint):longint;
 12   begin
 13     if a>b then exit(a) else exit(b);
 14   end;
 15 
 16 procedure add(c:char);
 17   var p,q,np,nq:longint;
 18   begin
 19     p:=last;
 20     inc(t); last:=t; np:=t;
 21     mx[np]:=mx[p]+1;
 22     while (p<>0) and (go[p,c]=0) do
 23     begin
 24       go[p,c]:=np;
 25       p:=fa[p];
 26     end;
 27     if p=0 then fa[np]:=1
 28     else begin
 29       q:=go[p,c];
 30       if mx[q]=mx[p]+1 then fa[np]:=q
 31       else begin
 32         inc(t); nq:=t;
 33         mx[nq]:=mx[p]+1;
 34         go[nq]:=go[q];
 35         fa[nq]:=fa[q];
 36         fa[q]:=nq; fa[np]:=nq;
 37         while go[p,c]=q do
 38         begin
 39           go[p,c]:=nq;
 40           p:=fa[p];
 41         end;
 42       end;
 43     end;
 44   end;
 45 
 46 procedure pre;
 47   var i:longint;
 48   begin
 49     for i:=1 to t do
 50     begin
 51       inc(sum[mx[i]]);
 52       f[i]:=mx[i];
 53     end;
 54     for i:=1 to n do
 55       inc(sum[i],sum[i-1]);
 56     for i:=t downto 1 do
 57     begin
 58       sa[sum[mx[i]]]:=i;
 59       dec(sum[mx[i]]);
 60     end;
 61   end;
 62 
 63 begin
 64   readln(m);
 65   readln(s);
 66   n:=length(s);
 67   last:=1; t:=1;
 68   for i:=1 to n do
 69     add(s[i]);
 70   pre;
 71   for i:=2 to m do
 72   begin
 73     readln(s);
 74     l:=length(s);
 75     p:=1; len:=0;
 76     fillchar(a,sizeof(a),0);
 77     for j:=1 to l do
 78     begin
 79       while (p<>0) and (go[p,s[j]]=0) do p:=fa[p];
 80       if p=0 then
 81       begin
 82         p:=1;
 83         len:=0;
 84       end
 85       else begin
 86         len:=min(len,mx[p])+1;
 87         p:=go[p,s[j]];
 88       end;
 89       a[p]:=max(a[p],len);
 90     end;
 91     for j:=t downto 1 do
 92     begin
 93       p:=sa[j];
 94       a[fa[p]]:=max(a[fa[p]],a[p]);
 95     end;
 96     for j:=1 to t do
 97       f[j]:=min(f[j],a[j]);
 98   end;
 99   for i:=1 to t do
100     ans:=max(ans,f[i]);
101   writeln(ans);
102 end.
View Code