最近正在学AC自动机,按照惯例需要刷一套kuangbin的AC自动机专题巩固
在网上看过很多模板,感觉kuangbin大神的模板最为简洁,于是就选择了用kuangbin大神的模板。
AC自动机其实就是字典树和KMP的结合,然后去思考一下KMP的原理,然后就是在字典树上实现KMP
这里最重要的思想可能就是fail的思想,就像KMP一样,匹配失败后,有一个next的数组去回溯(最长公共前缀后缀)
如何理解了KMP的话,感觉这个不会很难理解,字典树是一个非常简单的东西就不用讲了吧。
HDU - 2222,HDU - 2896,HDU - 3065,ZOJ - 3430
这是我总结出来的AC自动机解决的第一类问题,求文本串和模式串的关系(模式串出现几次之类的问题)
我把这几题放一起写了算了,因为套路一样随便改一下就好了
Keywords Search HDU - 2222
询问有多少个模式串出现在了文本串里面。
将模式串插入Trie树中,然后直接查询就好了。
#include <set>
#include <map>
#include <stack>
#include <queue>
#include <cmath>
#include <cstdio>
#include <string>
#include <vector>
#include <time.h>
#include <cstring>
#include <iostream>
#include <algorithm>
#include <unordered_map> #define pi acos(-1.0)
#define eps 1e-9
#define fi first
#define se second
#define rtl rt<<1
#define rtr rt<<1|1
#define bug printf("******\n")
#define mem(a, b) memset(a,b,sizeof(a))
#define name2str(x) #x
#define fuck(x) cout<<#x" = "<<x<<endl
#define sf(n) scanf("%d", &n)
#define sff(a, b) scanf("%d %d", &a, &b)
#define sfff(a, b, c) scanf("%d %d %d", &a, &b, &c)
#define sffff(a, b, c, d) scanf("%d %d %d %d", &a, &b, &c, &d)
#define pf printf
#define FIN freopen("data.txt","r",stdin)
#define gcd(a, b) __gcd(a,b)
#define lowbit(x) x&-x
#define IO iOS::sync_with_stdio(false) using namespace std;
typedef long long LL;
typedef unsigned long long ULL;
const int maxn = 1e6 + ;
const int maxm = 8e6 + ;
const int INF = 0x3f3f3f3f;
const int mod = 1e9 + ; struct Aho_Corasick {
int next[][], fail[], End[];
int root, cnt; int newnode() {
for (int i = ; i < ; i++) next[cnt][i] = -;
End[cnt++] = ;
return cnt - ;
} void init() {
cnt = ;
root = newnode();
} void insert(char buf[]) {
int len = strlen(buf);
int now = root;
for (int i = ; i < len; i++) {
if (next[now][buf[i] - 'a'] == -) next[now][buf[i] - 'a'] = newnode();
now = next[now][buf[i] - 'a'];
}
End[now]++;
} void build() {
queue<int> Q;
fail[root] = root;
for (int i = ; i < ; i++)
if (next[root][i] == -) next[root][i] = root;
else {
fail[next[root][i]] = root;
Q.push(next[root][i]);
}
while (!Q.empty()) {
int now = Q.front();
Q.pop();
for (int i = ; i < ; i++)
if (next[now][i] == -) next[now][i] = next[fail[now]][i];
else {
fail[next[now][i]] = next[fail[now]][i];
Q.push(next[now][i]);
}
}
} int query(char buf[]) {
int len = strlen(buf);
int now = root;
int res = ;
for (int i = ; i < len; i++) {
now = next[now][buf[i] - 'a'];
int temp = now;
while (temp != root) {
res += End[temp];
End[temp] = ;
temp = fail[temp];
}
}
return res;
} void debug() {
for (int i = ; i < cnt; i++) {
printf("id = %3d,fail = %3d,end = %3d,chi = [", i, fail[i], End[i]);
for (int j = ; j < ; j++) printf("%2d", next[i][j]);
printf("]\n");
}
}
}ac; char buf[];
int T,n;
int main() {
sf(T);
while(T--){
sf(n);
ac.init();
for (int i = ; i < n; ++i) {
scanf("%s",buf);
ac.insert(buf);
}
scanf("%s",buf);
ac.build();
printf("%d\n",ac.query(buf));
}
return ;
}
病毒侵袭 HDU - 2896
给出n个模式串,对m个串进行匹配,输出匹配的模式串的编号,以及总共多少个串可以匹配。
字符总数有128,而且编号要排序。
#include <set>
#include <map>
#include <stack>
#include <queue>
#include <cmath>
#include <cstdio>
#include <string>
#include <vector>
#include <time.h>
#include <cstring>
#include <iostream>
#include <algorithm>
#include <unordered_map> #define pi acos(-1.0)
#define eps 1e-9
#define fi first
#define se second
#define rtl rt<<1
#define rtr rt<<1|1
#define bug printf("******\n")
#define mem(a, b) memset(a,b,sizeof(a))
#define name2str(x) #x
#define fuck(x) cout<<#x" = "<<x<<endl
#define sf(n) scanf("%d", &n)
#define sff(a, b) scanf("%d %d", &a, &b)
#define sfff(a, b, c) scanf("%d %d %d", &a, &b, &c)
#define sffff(a, b, c, d) scanf("%d %d %d %d", &a, &b, &c, &d)
#define pf printf
#define FIN freopen("../date.txt","r",stdin)
#define gcd(a, b) __gcd(a,b)
#define lowbit(x) x&-x
#define IO iOS::sync_with_stdio(false) using namespace std;
typedef long long LL;
typedef unsigned long long ULL;
const int maxn = 1e6 + ;
const int maxm = 8e6 + ;
const int INF = 0x3f3f3f3f;
const int mod = 1e9 + ; struct Aho_Corasick {
int next[][], fail[], End[],vis[];
int root, cnt;
vector<int>ans;
int newnode() {
for (int i = ; i < ; i++) next[cnt][i] = -;
End[cnt++] = ;
return cnt - ;
} void init() {
cnt = ;
root = newnode();
} void insert(char buf[],int id) {
int len = strlen(buf);
int now = root;
for (int i = ; i < len; i++) {
if (next[now][buf[i]] == -) next[now][buf[i]] = newnode();
now = next[now][buf[i]];
}
End[now]++;
vis[now]=id;
} void build() {
queue<int> Q;
fail[root] = root;
for (int i = ; i < ; i++)
if (next[root][i] == -) next[root][i] = root;
else {
fail[next[root][i]] = root;
Q.push(next[root][i]);
}
while (!Q.empty()) {
int now = Q.front();
Q.pop();
for (int i = ; i < ; i++)
if (next[now][i] == -) next[now][i] = next[fail[now]][i];
else {
fail[next[now][i]] = next[fail[now]][i];
Q.push(next[now][i]);
}
}
} int query(char buf[]) {
int len = strlen(buf);
int now = root;
int res = ;
for (int i = ; i < len; i++) {
now = next[now][buf[i]];
int temp = now;
while (temp != root) {
res += End[temp];
if (vis[temp]) ans.push_back(vis[temp]);
// End[temp] = 0;
temp = fail[temp];
}
}
return res;
}
void pf_ans(){
sort(ans.begin(),ans.end());
for (int i= ;i<ans.size() ;i++) printf(" %d",ans[i]);
printf("\n");
ans.clear();
}
void debug() {
for (int i = ; i < cnt; i++) {
printf("id = %3d,fail = %3d,end = %3d,chi = [", i, fail[i], End[i]);
for (int j = ; j < ; j++) printf("%2d", next[i][j]);
printf("]\n");
}
}
}ac; char buf[];
int n,m;
int main() {
// FIN;
while(~sf(n)){
ac.init();
for (int i = ; i <= n; ++i) {
scanf("%s",buf);
ac.insert(buf,i);
}
ac.build();
sf(m);
int tot=;
for (int i = ; i <= m; ++i) {
scanf("%s",buf);
//fuck(i);
if (ac.query(buf)) {
tot++;
printf("web %d:",i);
ac.pf_ans();
}
}
printf("total: %d\n",tot);
}
return ;
}
病毒侵袭持续中 HDU - 3065
这题题意和上一题一样,多了一个输出出现次数。
#include <set>
#include <map>
#include <stack>
#include <queue>
#include <cmath>
#include <cstdio>
#include <string>
#include <vector>
#include <time.h>
#include <cstring>
#include <iostream>
#include <algorithm>
#include <unordered_map> #define pi acos(-1.0)
#define eps 1e-9
#define fi first
#define se second
#define rtl rt<<1
#define rtr rt<<1|1
#define bug printf("******\n")
#define mem(a, b) memset(a,b,sizeof(a))
#define name2str(x) #x
#define fuck(x) cout<<#x" = "<<x<<endl
#define sf(n) scanf("%d", &n)
#define sff(a, b) scanf("%d %d", &a, &b)
#define sfff(a, b, c) scanf("%d %d %d", &a, &b, &c)
#define sffff(a, b, c, d) scanf("%d %d %d %d", &a, &b, &c, &d)
#define pf printf
#define FIN freopen("../date.txt","r",stdin)
#define gcd(a, b) __gcd(a,b)
#define lowbit(x) x&-x
#define IO iOS::sync_with_stdio(false) using namespace std;
typedef long long LL;
typedef unsigned long long ULL;
const int maxn = 1e6 + ;
const int maxm = 8e6 + ;
const int INF = 0x3f3f3f3f;
const int mod = 1e9 + ; char str[][];
char buf[];
int n, m;
struct Aho_Corasick {
int next[*][], fail[*], End[*], num[];
int root, cnt; int newnode() {
for (int i = ; i < ; i++) next[cnt][i] = -;
End[cnt++] = ;
return cnt - ;
} void init() {
cnt = ;
root = newnode();
} void insert(char buf[], int id) {
int len = strlen(buf);
int now = root;
for (int i = ; i < len; i++) {
if (next[now][buf[i]] == -) next[now][buf[i]] = newnode();
now = next[now][buf[i]];
}
End[now] = id;
} void build() {
queue<int> Q;
fail[root] = root;
for (int i = ; i < ; i++)
if (next[root][i] == -) next[root][i] = root;
else {
fail[next[root][i]] = root;
Q.push(next[root][i]);
}
while (!Q.empty()) {
int now = Q.front();
Q.pop();
for (int i = ; i < ; i++)
if (next[now][i] == -) next[now][i] = next[fail[now]][i];
else {
fail[next[now][i]] = next[fail[now]][i];
Q.push(next[now][i]);
}
}
} void query(char buf[]) {
for(int i = ;i <= n;i++) num[i] = ;
int len = strlen(buf);
int now = root;
for (int i = ; i < len; i++) {
now = next[now][buf[i]];
int temp = now;
while (temp != root) {
if (End[temp]) num[End[temp]]++;
// End[temp] = 0;
temp = fail[temp];
}
}
for (int i= ;i<=n ;i++)
if (num[i]) printf("%s: %d\n",str[i],num[i]);
} void debug() {
for (int i = ; i < cnt; i++) {
printf("id = %3d,fail = %3d,end = %3d,chi = [", i, fail[i], End[i]);
for (int j = ; j < ; j++) printf("%2d", next[i][j]);
printf("]\n");
}
}
} ac; int main() {
//FIN;
while (~sf(n)) {
ac.init();
for (int i = ; i <= n; ++i) {
scanf("%s", str[i]);
ac.insert(str[i], i);
}
ac.build();
scanf("%s", buf);
ac.query(buf);
}
return ;
}
Detect the Virus ZOJ - 3430
这题本质上还是和上面一个套路只是恶心了很多。
题目是给先你n组经过编码后的病毒串:
编码就是将原串转成二进制后取每6位进行转化成10进制后与上面表格对应的字符串,
末尾不足6位补0,且原串个数为3k+1则在编码后的串里增加'==',
若个数为3k+2则增加'=',给定的模式串也是编码后的串,
所以需要进行反编码然后就是赤裸裸的AC自动机模版了。
注意转化后的字符范围为256(包括转义字符),strlen也无法使用,需要用unsigned char
#include <cstdio>
#include <cstring>
#include <queue> #define sf(n) scanf("%d", &n)
#define sff(a, b) scanf("%d %d", &a, &b) using namespace std;
typedef long long LL;
typedef unsigned long long ULL;
const int maxn = 1e6 + ;
const int maxm = 8e6 + ;
const int INF = 0x3f3f3f3f;
const int mod = 1e9 + ; unsigned char buf[];
char str[];
unsigned char s[];
int n, m, tot; struct Aho_Corasick {
int next[ * ][], fail[ * ], End[ * ], vis[];
int root, cnt; int newnode() {
for (int i = ; i < ; i++) next[cnt][i] = -;
End[cnt++] = ;
return cnt - ;
} void init() {
cnt = ;
root = newnode();
} void insert(unsigned char buf[], int len, int id) {
int now = root;
for (int i = ; i < len; i++) {
if (next[now][buf[i]] == -) next[now][buf[i]] = newnode();
now = next[now][buf[i]];
}
End[now] = id;
} void build() {
queue<int> Q;
fail[root] = root;
for (int i = ; i < ; i++)
if (next[root][i] == -) next[root][i] = root;
else {
fail[next[root][i]] = root;
Q.push(next[root][i]);
}
while (!Q.empty()) {
int now = Q.front();
Q.pop();
for (int i = ; i < ; i++)
if (next[now][i] == -) next[now][i] = next[fail[now]][i];
else {
fail[next[now][i]] = next[fail[now]][i];
Q.push(next[now][i]);
}
}
} int query(unsigned char buf[], int len, int n) {
memset(vis, false, sizeof(vis));
int now = root;
for (int i = ; i < len; i++) {
now = next[now][buf[i]];
int temp = now;
while (temp != root) {
if (End[temp] != -)
vis[End[temp]] = true;
temp = fail[temp];
}
}
int res = ;
for (int i = ; i <= n; i++) if (vis[i]) res++;
return res;
} void debug() {
for (int i = ; i < cnt; i++) {
printf("id = %3d,fail = %3d,end = %3d,chi = [", i, fail[i], End[i]);
for (int j = ; j < ; j++) printf("%2d", next[i][j]);
printf("]\n");
}
}
} ac; unsigned char Get(char ch) {
if (ch >= 'A' && ch <= 'Z')return ch - 'A';
if (ch >= 'a' && ch <= 'z')return ch - 'a' + ;
if (ch >= '' && ch <= '')return ch - '' + ;
if (ch == '+')return ;
else return ;
} void change(unsigned char str[], int len) {
int t = ;
for (int i = ; i < len; i += ) {
buf[t++] = ((str[i] << ) | (str[i + ] >> ));
if (i + < len)
buf[t++] = ((str[i + ] << ) | (str[i + ] >> ));
if (i + < len)
buf[t++] = ((str[i + ] << ) | str[i + ]);
}
tot = t;
} int main() {
while (~sf(n)) {
ac.init();
for (int i = ; i <= n; ++i) {
scanf("%s", str);
int len = strlen(str);
while (str[len - ] == '=')len--;
for (int j = ; j < len; j++) s[j] = Get(str[j]);
change(s, len);
ac.insert(buf, tot, i);
}
ac.build();
sf(m);
for (int i = ; i <= m; i++) {
scanf("%s", str);
int len = strlen(str);
while (str[len - ] == '=') len--;
for (int j = ; j < len; j++) s[j] = Get(str[j]);
change(s, len);
printf("%d\n", ac.query(buf, tot, n));
}
printf("\n");
}
return ;
}