使用C语言解决字符串匹配问题的方法

时间:2022-04-22 13:22:46

最常想到的方法是使用KMP字符串匹配算法:

?
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
 
int get_nextval(char *pattern, int next[])
{
  //get the next value of the pattern
  int i = 0, j = -1;
  next[0] = -1;
  int patlen = strlen(pattern);
  while ( i < patlen - 1){
    if ( j == -1 || pattern[i] == pattern[j]){
      ++i;
      ++j;
      if (pattern[i] != pattern[j])
        next[i] = j;
      else
        next[i] = next[j];
    }
    else
      j = next[j];
    }
 
  return(0);
}
 
int kmpindex(char *target, char *pattern, int pos)
{
  int tari = pos, pati = 0;
  int tarlen = strlen(target), patlen = strlen(pattern);
  int *next = (int *)malloc(patlen * sizeof(int));
  get_nextval(pattern, next);
  while ( tari < tarlen && pati < patlen ){
    if (pati == -1 ||target[tari] == pattern[pati]){
      ++tari;
      ++pati;
      }else{
        pati = next[pati];
      }
  
if(next != NULL) free(next);
next = NULL;
if (pati == patlen)
  return tari - pati;
else
  return -1;
}
 
 
int main()
{
  char target[50], pattern[50];
  printf("imput the target:\n" );
  scanf("%s",target);
  printf("imput the pattern:\n" );
  scanf("%s",pattern);
  int ans = kmpindex(target,pattern,0);
  if (ans == -1)
    printf("error\n");
  else
    printf("index:%d\n",ans);
  return 0;
}


练习题
    题目描述: 
        读入数据string[ ],然后读入一个短字符串。要求查找string[ ]中和短字符串的所有匹配,输出行号、匹配字符串。匹配时不区分大小写,并且可以有一个用中括号表示的模式匹配。如“aa[123]bb”,就是说aa1bb、aa2bb、aa3bb都算匹配。 
    输入: 
    输入有多组数据。 
    每组数据第一行输入n(1<=n<=1000),从第二行开始输入n个字符串(不含空格),接下来输入一个匹配字符串。 
    输出: 
    输出匹配到的字符串的行号和该字符串(匹配时不区分大小写)。 
    样例输入: 
    4 
    Aab 
    a2B 
    ab 
    ABB 
    a[a2b]b 
    样例输出: 
    1 Aab 
    2 a2B 
    4 ABB 


ac代码

 

?
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
  
#define MAX 1001
#define LEN 101
  
struct str
{
  char name[101];
};
  
int main()
{
  struct str strs[MAX];
  struct str t[LEN];
  int i, n, len, j, k, left, right, count, flag;
  char text[LEN], newtext[LEN];
  
  while (scanf("%d", &n) != EOF) {
    // 接收数据
    getchar();
    for (i = 0; i < n; i ++) {
      scanf("%s", strs[i].name);
    }
  
    // 接收文本串
    getchar();
    gets(text);
    len = strlen(text);
  
    for (i = left = right = 0; i < len; i ++) {
      if (text[i] == '[') {
        left = i;
      } else if (text[i] == ']') {
        right = i;
        break;
      }
    }
    count = right - left - 1;
  
    if (count <= 0) {  // 没有正则匹配
      for (i = j = 0; i < len; i ++) {
        if (text[i] != '[' && text[i] != ']') {
          newtext[j ++] = text[i];
        }
      }
      newtext[j] = '\0';
      for (i = 0; i < n; i ++) {
        if (strcasecmp(strs[i].name, newtext) == 0) {
          printf("%d %s\n", i + 1, strs[i].name);
        }
      }
    }else { // 需要正则匹配
      for (j = 1, k = 0; j <= count; j ++, k ++) { // 构建文本数组
        memset(t[k].name, '\0', sizeof(t[k].name));
        for (i = 0; i < left; i ++) {
          t[k].name[i] = text[i];
        }
        t[k].name[i] = text[left + j];
        strcat(t[k].name, text + right + 1);  
      }  
      
      // 正则匹配 
      for (i = 0; i < n; i ++) {
        for (j = flag = 0; j < count; j ++) {
          if (strcasecmp(strs[i].name, t[j].name) == 0) {
            flag = 1;
            break;
          }
        }
        if (flag) {
          printf("%d %s\n", i + 1, strs[i].name);
        }
      }
    }
  
  }
  
  return 0;
}

    /**************************************************************
        Problem: 1165
        User: wangzhengyi
        Language: C
        Result: Accepted
        Time:0 ms
        Memory:948 kb
    ****************************************************************/