AC自动机专题

时间:2022-05-03 20:45:54

AC自动机简介:KMP是用于解决单模式串匹配问题, AC自动机用于解决多模式串匹配问题。

精华:设这个节点上的字母为C,沿着他父亲的失败指针走,直到走到一个节点,他的儿子中也有字母为C的节点。然后把当前节点的失败指针指向那个字目也为C的儿子。如果一直走到了root都没找到,那就把失败指针指向root。

如果用KMP来解决多模式串匹配问题,则复杂度为O(n + k * m), 而AC自动机的负责度为O(n + m + z), z为模式串出现的次数。

学习链接:

http://hi.baidu.com/nialv7/item/ce1ce015d44a6ba7feded52d

http://blog.csdn.net/niushuai666/article/details/7002823

http://www.cnblogs.com/kuangbin/p/3164106.html

题目链接:http://acm.hdu.edu.cn/showproblem.php?pid=2222

思路:AC自动机的入门题,用的是bin牛的模板,统计End数组即可,统计过的需要清0.

 #include <iostream>
#include <cstdio>
#include <cstring>
#include <algorithm>
#include <queue>
#define FOR(i, a, b) for (int i = (a); i < (b); ++i)
#define REP(i, a, b) for (int i = (a); i <= (b); ++i)
using namespace std; const int MAX_N = ( + );
struct Trie {
int next[MAX_N][], End[MAX_N], fail[MAX_N];
int root, L;
int NewNode()
{
FOR(i, , ) next[L][i] = -;
End[L++] = ;
return L - ;
}
void Init()
{
L = ;
root = NewNode();
}
void Insert(char *str)
{
int len = strlen(str), now = root;
FOR(i, , len) {
int id = str[i] - 'a';
if (next[now][id] == -) next[now][id] = NewNode();
now = next[now][id];
}
++End[now];
}
void Build()
{
queue<int > que;
fail[root] = root;
FOR(i, , ) {
if (next[root][i] == -) next[root][i] = root;
else {
fail[next[root][i]] = root;
que.push(next[root][i]);
}
}
while (!que.empty()) {
int now = que.front();
que.pop();
FOR(i, , ) {
if (next[now][i] == -) {
next[now][i] = next[fail[now]][i];
} else {
fail[next[now][i]] = next[fail[now]][i];
que.push(next[now][i]);
}
}
}
}
int Query(char *str)
{
int len = strlen(str), now = root, res = ;
FOR(i, , len) {
int id = str[i] - 'a';
now = next[now][id];
int tmp = now;
while (tmp != root) {
res += End[tmp];
End[tmp] = ;
tmp = fail[tmp];
}
}
return res;
}
} AC; int n;
char str[ + ]; int main()
{
int Cas;
scanf("%d", &Cas);
while (Cas--) {
AC.Init();
scanf("%d", &n);
REP(i, , n) {
scanf("%s", str);
AC.Insert(str);
}
AC.Build();
scanf("%s", str);
printf("%d\n", AC.Query(str));
}
return ;
}

题目链接:http://acm.hdu.edu.cn/showproblem.php?pid=2896

思路:和上题差不多,只是用End数组来记录序号而已。

 #include <iostream>
#include <cstdio>
#include <cstring>
#include <algorithm>
#include <queue>
#include <vector>
#define FOR(i, a, b) for (int i = (a); i < (b); ++i)
#define REP(i, a, b) for (int i = (a); i <= (b); ++i)
using namespace std; const int MAX_N = ( + );
struct Trie { int next[MAX_N][], End[MAX_N], fail[MAX_N];
int root, L;
int NewNode() {
FOR(i, , ) next[L][i] = -;
End[L++] = ;
return L - ;
}
void Init() {
L = ;
root = NewNode();
} void Insert(char *str, int index) {
int len = strlen(str), now = root;
FOR(i, , len) {
int id = str[i];
if (next[now][id] == -) next[now][id] = NewNode();
now = next[now][id];
}
End[now] = index;
}
void Build() {
queue<int > que;
fail[root] = root;
FOR(i, , ) {
if (next[root][i] == -) next[root][i] = root;
else {
fail[next[root][i]] = root;
que.push(next[root][i]);
}
}
while (!que.empty()) {
int now = que.front();
que.pop();
FOR(i, , ) {
if (next[now][i] == -) {
next[now][i] = next[fail[now]][i];
} else {
fail[next[now][i]] = next[fail[now]][i];
que.push(next[now][i]);
}
}
}
}
void Query(char *str, vector<int > &ans) {
int len = strlen(str), now = root;
FOR(i, , len) {
now = next[now][str[i]];
int tmp = now;
while (tmp != root) {
if (End[tmp]) ans.push_back(End[tmp]);
tmp = fail[tmp];
}
}
} } AC; int N, M, res;
char str[ + ];
vector<int > ans[ + ]; int main()
{
AC.Init();
scanf("%d", &N);
REP(i, , N) {
scanf("%s", str);
AC.Insert(str, i);
}
AC.Build();
scanf("%d", &M);
FOR(i, , M) {
scanf("%s", str);
AC.Query(str, ans[i]);
}
res = ;
FOR(i, , M) {
if ((int)ans[i].size()) {
printf("web %d:", i + );
sort(ans[i].begin(), ans[i].end());
FOR(j, , (int)ans[i].size()) printf(" %d", ans[i][j]);
puts("");
++res;
}
}
printf("total: %d\n", res);
return ;
}

题目链接:http://acm.hdu.edu.cn/showproblem.php?pid=3065

思路:用一个数组来记录模式串在主串中出现的次数。

 #include <iostream>
#include <cstdio>
#include <cstring>
#include <algorithm>
#include <queue>
#define FOR(i, a, b) for (int i = (a); i < (b); ++i)
#define REP(i, a, b) for (int i = (a); i <= (b); ++i)
using namespace std; const int MAX_N = ( + ); int N, num[ + ];
char ss[ + ][];
char str[ + ]; struct Trie {
int next[MAX_N][], End[MAX_N], fail[MAX_N];
int root, L;
int NewNode() {
FOR(i, , ) next[L][i] = -;
End[L++] = -;
return L - ;
} void Init() {
L = ;
root = NewNode();
} void Insert(char *str, int index) {
int len = strlen(str), now = root;
FOR(i, , len) {
if (next[now][str[i]] == -) next[now][str[i]] = NewNode();
now = next[now][str[i]];
}
End[now] = index;
} void Build() {
queue<int > que;
fail[root] = root;
FOR(i, , ) {
if (next[root][i] == -) next[root][i] = root;
else {
fail[next[root][i]] = root;
que.push(next[root][i]);
}
}
while (!que.empty()) {
int now = que.front();
que.pop();
FOR(i, , ) {
if (next[now][i] == -) next[now][i] = next[fail[now]][i];
else {
fail[next[now][i]] = next[fail[now]][i];
que.push(next[now][i]);
}
}
}
} void Query(char *str) {
memset(num, , sizeof(num));
int len = strlen(str), now = root;
FOR(i, , len) {
now = next[now][str[i]];
int tmp = now;
while (tmp != root) {
if (End[tmp] != -) ++num[End[tmp]];
tmp = fail[tmp];
}
}
FOR(i, , N) {
if (num[i]) printf("%s: %d\n", ss[i], num[i]);
}
} } AC; int main()
{
while (~scanf("%d", &N)) {
AC.Init();
scanf("%d", &N);
FOR(i, , N) {
scanf("%s", ss[i]);
AC.Insert(ss[i], i);
}
AC.Build();
scanf("%s", str);
AC.Query(str);
}
return ;
}

题目链接:http://poj.org/problem?id=2778

思路:需要用到的知识:有向图中点A到点B走K步的路径数等于有向图原始矩阵的K次幂。然后对于已经建好的Trie图,我们就可以建图了,如果某个节点A不是终止节点并且这个节点的next节点B也不是终止节点,那么就连边(表示从A点走1步到节点B的方法有1种)。建好图之后就是矩阵的快速幂了,然后在统计节点0(根节点)到其余节点走N步的方法数的总和。

 #include <iostream>
#include <cstdio>
#include <cstring>
#include <algorithm>
#include <queue>
#define REP(i, a, b) for (int i = (a); i < (b); ++i)
#define FOR(i, a, b) for (int i = (a); i <= (b); ++i)
using namespace std; const int MAX_N = ( + );
const int MOD = ();
int M, N;
char str[]; struct Matrix {
long long mat[MAX_N][MAX_N];
int n;
Matrix() {}
Matrix(int _n)
{
n = _n;
REP(i, , n)
REP(j, , n) mat[i][j] = ;
}
Matrix operator *(const Matrix &b) const
{
Matrix c = Matrix(n);
REP(i, , n) {
REP(j, , n) {
REP(k, , n) {
c.mat[i][j] += mat[i][k] * b.mat[k][j];
if (c.mat[i][j] >= MOD) c.mat[i][j] %= MOD;
}
}
}
return c;
} }; Matrix Pow(Matrix mat, int n)
{
Matrix ONE = Matrix(mat.n);
REP(i, , mat.n) ONE.mat[i][i] = ;
Matrix tmp = mat;
while (n) {
if (n & ) ONE = ONE * tmp;
n >>= ;
tmp = tmp * tmp;
}
return ONE;
} struct Trie {
int next[MAX_N][], End[MAX_N], fail[MAX_N];
int L, root;
int NewNode()
{
REP(i, , ) next[L][i] = -;
End[L++] = ;
return L - ;
} void Init()
{
L = ;
root = NewNode();
} int getID(char ch)
{
if (ch == 'A') return ;
if (ch == 'C') return ;
if (ch == 'G') return ;
if (ch == 'T') return ;
} void Insert(char *str)
{
int len = strlen(str), now = root;
REP(i, , len) {
int id = getID(str[i]);
if (next[now][id] == -) next[now][id] = NewNode();
now = next[now][id];
}
End[now] = ;
} void Build()
{
queue<int > que;
fail[root] = root;
REP(i ,, ) {
if (next[root][i] == -) next[root][i] = root;
else {
fail[next[root][i]] = root;
que.push(next[root][i]);
}
}
while (!que.empty()) {
int now = que.front();
que.pop();
if (End[fail[now]]) End[now] = ;
REP(i, , ) {
if (next[now][i] == -) next[now][i] = next[fail[now]][i];
else {
fail[next[now][i]] = next[fail[now]][i];
que.push(next[now][i]);
}
}
}
} Matrix getMatrix()
{
Matrix res = Matrix(L);
REP(i, , L)
REP(j, , ) if (!End[next[i][j]]) ++res.mat[i][next[i][j]];
return res;
} } AC; int main()
{
while (~scanf("%d %d", &M, &N)) {
AC.Init();
FOR(i, , M) scanf("%s", str), AC.Insert(str);
AC.Build();
Matrix tmp = AC.getMatrix();
tmp = Pow(tmp, N);
long long ans = ;
REP(i, , tmp.n) {
ans += tmp.mat[][i];
if (ans >= MOD) ans %= MOD;
}
printf("%lld\n", ans);
}
return ;
}