Problem Description
Bob has a dictionary with N words in it.
Now there is a list of words in which the middle part of the word has continuous letters disappeared. The middle part does not include the first and last character.
We only know the prefix and suffix of each word, and the number of characters missing is uncertain, it could be 0. But the prefix and suffix of each word can not overlap.
For each word in the list, Bob wants to determine which word is in the dictionary by prefix and suffix.
There are probably many answers. You just have to figure out how many words may be the answer.
Input
The first line of the input gives the number of test cases T; T test cases follow.
Each test case contains two integer N and Q, The number of words in the dictionary, and the number of words in the list.
Next N line, each line has a string Wi, represents the ith word in the dictionary (0<|Wi|≤100000)
Next Q line, each line has two string Pi , Si, represents the prefix and suffix of the ith word in the list (0<|Pi|,|Si|≤100000,0<|Pi|+|Si|≤100000)
All of the above characters are lowercase letters.
The dictionary does not contain the same words.
Limits
T ≤ 5
0 < N, Q ≤ 100000
∑ Si + Pi ≤ 500000
∑ Wi ≤ 500000
Output
For each test case, output Q lines, an integer per line, represents the answer to each word in the list.
Sample Input
1
4 4
aba
cde
acdefa
cdef
a a
cd ef
ac a
ce f
Sample Output
2
1
1
0
题意:
给出n个字符串,q个查询,每个查询包含A、B两个字符串,问在给定的n个字符串中,有多少个字符串满足前缀是A,后缀是B且前缀后缀没有重叠部分
分析:
对查询离线处理,给定的字符串保存下来,而对查询的前缀后缀建立字典树,建树过程如下,假设有ac ef这种查询情况:
先将ef翻转过来使得查询变为ac fe, 之后再加一个特殊字符将前缀于后缀连接起来变为ac#fe,对ac#fe建立字典树,并在这个字符串的结尾处设置一个值,这个值为当前查询前后缀的下标k(即第几个查询),如果查询的前缀后缀都相同的,取第一个出现的就好了。
然后是对给定的字符串查询,就是查询每个字符串对查询的贡献,假设有ac ef的查询,有给定的字符串acdef,在字典树上匹配的时候有先有0->a,发现没有a#,继续,然后0->a->c发现ac#有,那么就开始将acdef的字符串反过来匹配了,变成0->a->c->#->f->e 因为f没有值,到达e的时候发现e这处节点有值k,那么就是ans[k]++。
代码:
#include<cstdio>
#include<cstring>
#include<cmath>
#include<iostream>
#include<algorithm>
typedef long long ll;
const int maxn = 6e5 + 10;
const int INF = 1e9 + 10;
const int mod = 1e9 + 7;
using namespace std;
int val[maxn], sz;
int ch[maxn][27];
char str[maxn];
char s[maxn];
int n,T,q;
int len[maxn], ans[maxn];
int nxt[maxn];
int __insert(int st, int l, int r, int v, int flag)///根节点,左右区间,表示前后缀而且只需要在后缀记录数目,正逆序标记
{
int u = st, t = abs(r - l) + 1;
for(int i = l; t--; i += flag)///循环遍历整个前缀或者后缀
{
int c = s[i] - 'a';
if(!ch[u][c])
{
memset(ch[sz], 0, sizeof ch[sz]);
val[sz] = 0;
ch[u][c] = sz++;
}
u = ch[u][c];
}
if(v)///表示当前插入的这个是后缀的情况
{
if(val[u] == 0) val[u] = v;
else nxt[v] = val[u];///如果有多个的后缀是以同一个字母结尾的,都取同一个就好了
}
return u;
}
void query(int L, int R)///查询一个单词
{
int u = 0;
for(int i = L; i <= R; i++)
{
int c = str[i] - 'a';
if(!ch[u][c]) return ;///压根就没有这个前缀的单词
u = ch[u][c];
if(!ch[u][26]) continue;///找到这个前缀了
int st = ch[u][26];
for(int j = R; j > i; j--)////反过来找后缀
{
int k = str[j] - 'a';
st = ch[st][k];
if(!st) break;
if(val[st]) ans[val[st]]++;
}
}
}
int main()
{
scanf("%d", &T);
while(T--)
{
sz = 1;
memset(ch[0], 0, sizeof ch[0]);
val[0] = 0;
scanf("%d %d", &n, &q);
for(int i = 1; i <= q; i++) nxt[i] = i;
int num = 0;
for(int i = 0; i < n; i++)
{
scanf("%s", str + num);
len[i] = strlen(str + num);
num += len[i];
}
for(int i = 1; i <= q; i++)
{
ans[i] = 0;
scanf("%s", s);
int l1 = strlen(s);
s[l1++] = 'a' + 26;
scanf("%s", s + l1);
int l2 = strlen(s + l1);
int node = __insert(0, 0, l1 - 1, 0, 1);
__insert(node, l2 + l1 - 1, l1, i, -1);
}
num = 0;
for(int i = 0; i < n; i++)
{
query(num, num + len[i] - 1);
num += len[i];
}
for(int i = 1; i <= q; i++)
{
printf("%d\n", ans[nxt[i]]);
}
}
return 0;
}