DNA Sequence
Time Limit: 1000MS | Memory Limit: 65536K | |
Total Submissions: 11860 | Accepted: 4527 |
Description
It's well known that DNA Sequence is a sequence only contains A, C, T and G, and it's very useful to analyze a segment of DNA Sequence,For example, if a animal's DNA sequence contains segment ATC then it may mean that the animal may have a genetic disease. Until now scientists have found several those segments, the problem is how many kinds of DNA sequences of a species don't contain those segments.
Suppose that DNA sequences of a species is a sequence that consist of A, C, T and G,and the length of sequences is a given integer n.
Input
First line contains two integer m (0 <= m <= 10), n (1 <= n <=2000000000). Here, m is the number of genetic disease segment, and n is the length of sequences.
Next m lines each line contain a DNA genetic disease segment, and length of these segments is not larger than 10.
Output
An integer, the number of DNA sequences, mod 100000.
Sample Input
4 3
AT
AC
AG
AA
Sample Output
36
几个月没编AC自动机,突然编一下感觉之痛苦。
主要问题一点是fail指针构造时没有即使break掉,另一点是tail标记上传时需要加入一个while循环。
#include<iostream>
#include<cstdio>
#include<cstring>
#include<algorithm>
using namespace std;
#define MAXN 100
#define MAXT 100
#define MOD 100000
typedef long long qword; struct trie_node
{
int ptr[];
int fail;
char w;
bool tl;
}trie[MAXT];
int topt=;
char str[MAXN];
void Add_word(char *str)
{
int now=;
int ind;
while (*str)
{
ind=*(str++)-'A';
if (!trie[now].ptr[ind])
{
trie[now].ptr[ind]=++topt;
trie[topt].w=ind+'A';
trie[topt].tl=false;
}
now=trie[now].ptr[ind];
}
trie[now].tl=true;
}
int q[MAXN];
void Build_Ac()
{
int head=-,tail=;
int now,temp;
int i,j;
q[]=;
trie[].fail=;
while (head<tail)
{
now=q[++head];
for (i=;i<;i++)
{
if (!trie[now].ptr[i])continue;
q[++tail]=trie[now].ptr[i];
if (now==)
{
trie[trie[now].ptr[i]].fail=now;
continue;
}
temp=trie[now].fail;
while(temp!=)
{
if (trie[temp].ptr[i])
{
trie[trie[now].ptr[i]].fail=trie[temp].ptr[i];
break;
}
temp=trie[temp].fail;
}
if (!trie[trie[now].ptr[i]].fail)
trie[trie[now].ptr[i]].fail=trie[].ptr[i];
if (!trie[trie[now].ptr[i]].fail)
trie[trie[now].ptr[i]].fail=;
}
}
for (i=;i<=topt;i++)
{
for (j=;j<;j++)
{
if (!trie[i].ptr[j])
{
temp=trie[i].fail;
while (temp!=)
{
if (trie[temp].ptr[j])
{
trie[i].ptr[j]=trie[temp].ptr[j];
break;
}
temp=trie[temp].fail;
}
if (!trie[i].ptr[j])
trie[i].ptr[j]=trie[].ptr[j];
if (!trie[i].ptr[j])
trie[i].ptr[j]=;
}
}
}
j=;
while (j--)
{
for (i=;i<=tail;i++)
{
if (trie[trie[i].fail].tl)
trie[i].tl=true;
}
}
}
struct matrix
{
int n,m;
qword a[MAXN][MAXN];
matrix()
{
memset(a,,sizeof(a));
}
void pm()
{
int i,j;
for (i=;i<=n;i++)
{
for (j=;j<=m;j++)
{
printf("% 4d ",a[i][j]);
}
printf("\n");
}
printf("\n");
}
}m1,r1,m2;
matrix operator *(matrix &m1,matrix &m2)
{
if (m1.m!=m2.n)throw ;
matrix ret;
ret.n=m1.n;
ret.m=m2.m;
int i,j,k;
for (i=;i<=ret.n;i++)
{
for (j=;j<=ret.m;j++)
{
for (k=;k<=m1.m;k++)
{
ret.a[i][j]=(ret.a[i][j]+m1.a[i][k]*m2.a[k][j])%MOD;
}
}
}
return ret;
} int main()
{
freopen("input.txt","r",stdin);
// freopen("output.txt","w",stdout);
int n,m,i,j,k,x,y,z;
scanf("%d%d\n",&m,&n);
topt=;
trie[].w='#';
for (i=;i<m;i++)
{
scanf("%s",str);
x=strlen(str);
for (j=;j<x;j++)
{
switch(str[j])
{
case 'A':str[j]='A';break;
case 'G':str[j]='B';break;
case 'C':str[j]='C';break;
case 'T':str[j]='D';break;
}
}
Add_word(str);
}
Build_Ac();
m1.m=m1.n=topt;
for (i=;i<=topt;i++)
{
for (j=;j<;j++)
{
if (trie[trie[i].ptr[j]].tl)continue;
m1.a[trie[i].ptr[j]][i]++;
}
}
// m1.pm();
m2.m=,m2.n=topt;
m2.a[][]=;
while (n)
{
if (n&)
{
m2=m1*m2;
}
m1=m1*m1;
n>>=;
}
/*
for (i=1;i<=n;i++)
{
m2=m1*m2;
// m2.pm();
}*/
qword ans=;
for (i=;i<=topt;i++)
{
ans+=m2.a[i][];
ans%=MOD;
}
printf("%d\n",ans);
return ;
}