AC automation 模板

时间:2023-03-09 09:26:49
AC automation 模板
 /*
1.对n个字符串构造tire树 insertWord(node *root, char *word);
2.bfs构造fail指针 makeFail(node *root);
3.基于以上两点的查询 query(node *root, char *str);
*/
#include <stdio.h>
#include <string.h>
#include <queue>
using namespace std;
const int N1 = + ;
const int N2 = + ;
char key[N1];
char desc[N2];
struct node
{
node *next[];
int cnt;
node *fail;
node(){for(int i=; i<; ++i) next[i] = NULL; fail = NULL; cnt = ;}
};
void insertWord(node *root)//构造trie树
{
node *cur = root;
int n = strlen(key);
for(int i=; i<n; ++i)
{
int index = key[i] - 'a';
if(cur->next[index] != NULL)
cur = cur->next[index];
else
{
cur->next[index] = new node();
cur = cur->next[index];
}
}
cur->cnt++;
}
void makeFail(node *root)//构造fail指针
{
queue<node*> q;
q.push(root);
node *cur;
while(!q.empty())
{
cur = q.front();
q.pop();
for(int i=; i<; ++i)
{
if(cur->next[i] != NULL)
{
if(cur == root)//与root相连的结点,即第二层的结点的fail指针都是root
cur->next[i]->fail = root;
else
{
node *tmp = cur;
while(tmp->fail != NULL)// why while?
{
if(tmp->fail->next[i] != NULL)
{
cur->next[i]->fail = tmp->fail->next[i];
break;
}
tmp = tmp->fail;
}
if(tmp->fail == NULL)
cur->next[i]->fail = root;
}
q.push(cur->next[i]);
}
}
}
}
int query(node *root)//查询
{
node *cur = root;
node *tmp = NULL;
int i=,cnt=;
while(desc[i])
{
int index = desc[i] - 'a';
while(cur!=root && cur->next[index] == NULL)
cur = cur->fail;
if(cur->next[index] != NULL)
{
cur = cur->next[index];
tmp = cur;
while(tmp != root && tmp->cnt!=)
{
cnt += tmp->cnt;
tmp->cnt = ;
tmp = tmp->fail;
}
}
i++;
}
return cnt;
}
int main()
{
int t,n;
scanf("%d",&t);
while(t--)
{
node *root = new node();
scanf("%d",&n);
for(int i=; i<n; ++i)
{
scanf("%s",key);
insertWord(root);
}
makeFail(root);
scanf("%s",desc);
int ans = query(root);
printf("%d\n",ans);
}
return ;
}
 //多串匹配,n个模式字符串构成AC自动机,然后目标串去匹配,看目标串中有多少个模式串
#include <stdio.h>
#include <string.h>
#include <queue>
using namespace std;
/*
根结点的fail指针为NULL,根结点的直接子结点的fail指针为root,很明显,当一个字符都不匹配时,从根结点再开始匹配
每个结点的fail指针都是由它父结点的fail指针决定的。
*/
const int N = + ;
struct node
{
node *next[],*fail;
int cnt;
node(){for(int i=; i<; ++i) next[i] = NULL; fail = NULL; cnt = ;}
};
void insertWord(node *root, char *word)
{
node *cur = root;
int i = ;
while(word[i])
{
int index = word[i] - 'a';
if(cur->next[index] == NULL)
cur->next[index] = new node();
cur = cur->next[index];
++i;
}
cur->cnt ++;
}
char str[N];
void makeFail(node *root)
{
node *cur,*tmp;
queue<node*> q;
q.push(root);
while(!q.empty())
{
cur = q.front();
q.pop();
for(int i=; i<; ++i)
{
if(cur->next[i] != NULL)
{
q.push(cur->next[i]);
if(cur == root)//如果当前结点是root,那么它的直接孩子结点的fail指针指向root
cur->next[i]->fail = root;
else
{
tmp = cur;
while(tmp->fail != NULL)//because root->fail == NULL,如果到了这个地步,说明当前字符串没有后缀
{
if(tmp->fail->next[i] != NULL)
{
cur->next[i]->fail = tmp->fail->next[i];
break;
}
tmp = tmp->fail;
}
if(tmp->fail == NULL)
cur->next[i]->fail = root;
}
}
}
}
} // how to query???
int query(node *root, char *str)
{
node *cur = root;
node *tmp = NULL;
int i = , cnt = ;
while(str[i])
{
int index = str[i] - 'a';
while(cur!=root && cur->next[index]==NULL)//如果失配,那么直接跳到fail指针处去匹配
cur = cur->fail;
if(cur->next[index] != NULL)
{
cur = cur->next[index];//如果当前字符匹配成功,则跳到那个字符,
tmp = cur;
//这就是为什么Ac自动机效率高的缘故,根据fail指针,跳到当前字符串的最长后缀去
//如果tmp->cnt != 0 说明存在该最长后缀形成的字符串
while(tmp->cnt!=)
{
cnt += tmp->cnt;
tmp->cnt = ;
tmp = tmp->fail;
}
}
++i;
}
return cnt;
}
int main()
{
int t,n;
scanf("%d",&t);
char word[];
while(t--)
{
node *root = new node();
scanf("%d",&n);
for(int i=; i<n; ++i)
{
scanf("%s",word);
insertWord(root, word);
}
scanf("%s",str);
makeFail(root);
int ans = query(root, str);
printf("%d\n",ans);
}
return ;

给定n个模式串,长度均不超过m,和一个目标串(长度为L),问目标串中包含多少个模式串(可重叠
的)。
暴力算法是一个个模式串去与目标串匹配,时间复杂度是O(n*m*L)
有更好的算法是AC自动机,时间复杂度是O(n)(这个怎么算来着??)

AC自动机分为两步,1.构建trie树。2.构建fail指针。正是这个fail指针将时间复杂度给大大缩小了

fail指针是匹配失败时,该跳到那个结点去重新匹配
fail指针是指向当前字符串的最长后缀,比如she的fail指针应该指向he或e或root(即指向存在的最长后

缀)
所以当前结点的fail指针由父结点的fail指针所决定

学习资料:http://www.cppblog.com/menjitianya/archive/2014/07/10/207604.html