本文章为代替git保存代码
Kmp算主要分为两步:
(1)取得next数组
(2)查找子串
代码如下:
#include <iostream>
#include <string>
#include <vector>
#include <cassert>
/* run this program using the console pauser or add your own getch, system("pause") or input loop */
bool get_next(const std::string& str, std::vector<int>& next) {
if (str.empty()) {
return false;
}
next.resize(str.size());
// str[0 ~ k-1] = str[p-k ~ p-1]
// 0 k-1 k p-k p-1 p
// a b c d e f g h a b c d e n o p q t s t u v w x y z
int k = -1;
int p = 0;
next[0] = -1;
while (p + 1 < str.size()) {
if (k == -1 || str[k] == str[p]) {
k++;
p++;
next[p] = k;
} else {
k = next[k];
}
}
return true;
}
void next_test() {
std::vector<int> next;
std::string sub_str = "abaabcaba";
assert(get_next(sub_str, next));
for (auto index = next.begin(); index != next.end(); ++index) {
std::cout << *index << std::endl;
}
}
int kmp(const std::string& str, const std::string& sub) {
std::vector<int> next;
if (sub.empty() && str.empty()) {
return 0;
}
if (sub.size() > str.size()) {
return -1;
}
if (!get_next(sub, next)) {
return -1;
}
for (auto index = next.begin(); index != next.end(); ++index) {
std::cout << *index << std::endl;
}
int ret = -1;
int i = 0;
int j = 0;
int str_len = str.size();
int sub_len = sub.size();
// 直接在while循环中使用str.size(),会出现有符号数到无符号数的转化
while (i < str_len && j < sub_len) {
if (j == -1 || str[i] == sub[j]) {
++i;
++j;
} else {
j = next[j];
}
}
if (j == sub.size()) {
ret = i - sub.size();
}
return ret;
}
int main(int argc, char** argv) {
//next_test();
std::string str = "sbxajcbbdsccshdcbusdcbjhcbuvbdfb";
std::string sub = "cbbd";
auto ret = kmp(str, sub);
std::cout << "str = " << str << std::endl;
std::cout << "sub = " << sub << std::endl;
std::cout << "pos = " << ret << std::endl;
return 0;
}