题目链接:https://vjudge.net/problem/HDU-3247
Resource Archiver
Time Limit: 20000/10000 MS (Java/Others) Memory Limit: 100000/100000 K (Java/Others)
Total Submission(s): 3228 Accepted Submission(s): 1052
Wait a minute… you realized that it isn’t as easy as you thought. Think about the virus killers. They’ll find your software suspicious, if your software contains one of the m predefined virus codes. You absolutely don’t want this to happen.
Technically, resource files and virus codes are merely 01 strings. You’ve already convinced yourself that none of the resource strings contain a virus code, but if you make the archive arbitrarily, virus codes can still be found somewhere.
Here comes your task (formally): design a 01 string that contains all your resources (their occurrences can overlap), but none of the virus codes. To make your software smaller in size, the string should be as short as possible.
1110
0111
101
1001
0 0
题意:
给出n个(n<=10)字符串,和m(m<=1000)个单词。求包含所有字符串,并且不包含任何一个单词的长串的最短长度。
题解:
1.看到n的大小为10,并且要求包含所有字符串,就很容易想到用状压DP。
1.1 设dp[status][u]为:当前状态为status(包含了哪些字符串),并且以字符串u结尾的最短长度。设cost[u][v]为:在字符串u后面接上字符串v所需要的最短长度。由于字符串之间可以有重叠,所以cost[u][v]可能小于len[v];由于长串不能包含单词,所以在链接u、v时,之间可能还需要加一些字符以便跳过单词,所以cost[u][v]可能大于len[v]。
1.2 假如求出了cost数组,那么剩下的就是典型TSP问题了。
2. 如何求cost数组呢?
2.1 把n个字符串和m个单词都插入到AC自动机中,并且需要对字符串和单词作相应的标记。
2.2 对于每个字符串,在自动机上跑最短路,求出当前字符串与其他字符串的最短距离,在跑的时候需要跳过单词。原理:AC自动机各个状态之间的关系构成了一张图。
代码如下:
#include <iostream>
#include <cstdio>
#include <cstring>
#include <algorithm>
#include <vector>
#include <cmath>
#include <queue>
#include <stack>
#include <map>
#include <string>
#include <set>
using namespace std;
typedef long long LL;
const double EPS = 1e-;
const int INF = 2e9;
const LL LNF = 2e18;
const int MOD = 1e5;
const int MAXN = 5e4+1e4+; int n, m;
int Index[MAXN], pos[MAXN], cost[][], Len[MAXN];
int dp[<<][];
struct Trie
{
int sz, base;
int next[MAXN][], fail[MAXN], end[MAXN];
int root, L;
int newnode()
{
for(int i = ; i<sz; i++)
next[L][i] = -;
end[L++] = false;
return L-;
}
void init(int _sz, int _base)
{
sz = _sz;
base = _base;
L = ;
root = newnode();
}
int insert(char buf[], bool isVirus)
{
int len = strlen(buf);
int now = root;
for(int i = ; i<len; i++)
{
if(next[now][buf[i]-base] == -) next[now][buf[i]-base] = newnode();
now = next[now][buf[i]-base];
}
end[now] = isVirus;
return now;
}
void build()
{
queue<int>Q;
fail[root] = root;
for(int i = ; i<sz; i++)
{
if(next[root][i] == -) next[root][i] = root;
else fail[next[root][i]] = root, Q.push(next[root][i]);
}
while(!Q.empty())
{
int now = Q.front();
Q.pop();
end[now] |= end[fail[now]];
for(int i = ; i<sz; i++)
{
if(next[now][i] == -) next[now][i] = next[fail[now]][i];
else fail[next[now][i]] = next[fail[now]][i], Q.push(next[now][i]);
}
}
} int query()
{
for(int i = ; i<(<<n); i++)
for(int j = ; j<n; j++)
dp[i][j] = INF;
for(int j = ; j<n; j++)
dp[<<j][j] = Len[j]; for(int i = ; i<(<<n); i++)
for(int j = ; j<n; j++)
{
if(!(i&(<<j)) || dp[i][j]==INF) continue;
for(int k = ; k<n; k++)
if(!(i&(<<k)) && cost[j][k]!=INF)
dp[i|(<<k)][k] = min(dp[i|(<<k)][k], dp[i][j]+cost[j][k]);
} int ret = INF;
for(int i = ; i<n; i++)
ret = min(ret, dp[(<<n)-][i]);
return ret;
}
};
Trie ac; int dis[MAXN];
void BFS(int st)
{
queue<int> Q;
while(!Q.empty()) Q.pop();
for(int i = ; i<ac.L; i++)
dis[i] = INF;
dis[st] = ;
Q.push(st);
while(!Q.empty())
{
int u = Q.front(); Q.pop();
for(int i = ; i<ac.sz; i++)
{
int v = ac.next[u][i];
if(dis[v]==INF && !ac.end[v])
{
dis[v] = dis[u] + ;
Q.push(v);
}
}
}
for(int i = ; i<n; i++)
cost[Index[st]][i] = dis[pos[i]];
} char buf[MAXN];
int main()
{
while(scanf("%d%d",&n,&m)&&(n||m))
{
ac.init(,'');
memset(Index, -, sizeof(Index));
for(int i = ; i<n; i++)
{
scanf("%s", buf);
Len[i] = strlen(buf);
pos[i] = ac.insert(buf, false);
Index[pos[i]] = i;
}
for(int i = ; i<m; i++)
{
scanf("%s", buf);
ac.insert(buf, true);
}
ac.build();
for(int i = ; i<n; i++)
BFS(pos[i]); int ans = ac.query();
printf("%d\n", ans);
}
}