strstr()表示非null终止的字符串

时间:2021-11-05 20:40:56

How do I do the in-place equivalent of strstr() for a counted string (i.e. not null-terminated) in C?

如何在C中对已计数的字符串(即非空终止)执行strstr()的就地等效操作?

3 个解决方案

#1


5  

If you're afraid of O(m*n) behaviour - basically, you needn't, such cases don't occur naturally - here's a KMP implementation I had lying around which I've modified to take the length of the haystack. Also a wrapper. If you want to do repeated searches, write your own and reuse the borders array.

如果你害怕O(m * n)行为 - 基本上,你不需要,这种情况不会自然发生 - 这是我所说的KMP实现,我已经修改了它以占用大海捞针的长度。也是一个包装。如果您想重复搜索,请编写自己的搜索并重复使用borders数组。

No guarantees for bug-freeness, but it seems to still work.

不保证无错误,但它似乎仍然有效。

int *kmp_borders(char *needle, size_t nlen){
    if (!needle) return NULL;
    int i, j, *borders = malloc((nlen+1)*sizeof(*borders));
    if (!borders) return NULL;
    i = 0;
    j = -1;
    borders[i] = j;
    while((size_t)i < nlen){
        while(j >= 0 && needle[i] != needle[j]){
            j = borders[j];
        }
        ++i;
        ++j;
        borders[i] = j;
    }
    return borders;
}

char *kmp_search(char *haystack, size_t haylen, char *needle, size_t nlen, int *borders){
    size_t max_index = haylen-nlen, i = 0, j = 0;
    while(i <= max_index){
        while(j < nlen && *haystack && needle[j] == *haystack){
            ++j;
            ++haystack;
        }
        if (j == nlen){
            return haystack-nlen;
        }
        if (!(*haystack)){
            return NULL;
        }
        if (j == 0){
            ++haystack;
            ++i;
        } else {
            do{
                i += j - (size_t)borders[j];
                j = borders[j];
            }while(j > 0 && needle[j] != *haystack);
        }
    }
    return NULL;
}

char *sstrnstr(char *haystack, char *needle, size_t haylen){
    if (!haystack || !needle){
        return NULL;
    }
    size_t nlen = strlen(needle);
    if (haylen < nlen){
        return NULL;
    }
    int *borders = kmp_borders(needle, nlen);
    if (!borders){
        return NULL;
    }
    char *match = kmp_search(haystack, haylen, needle, nlen, borders);
    free(borders);
    return match;
}

#2


5  

See if the function below works for you. I haven't tested it thoroughly, so I would suggest you do so.

看看下面的功能是否适合您。我没有彻底测试过,所以我建议你这样做。

char *sstrstr(char *haystack, char *needle, size_t length)
{
    size_t needle_length = strlen(needle);
    size_t i;

    for (i = 0; i < length; i++)
    {
        if (i + needle_length > length)
        {
            return NULL;
        }

        if (strncmp(&haystack[i], needle, needle_length) == 0)
        {
            return &haystack[i];
        }
    }
    return NULL;
}

#3


2  

I just came across this and I'd like to share my implementation. It think it quite fast a I don't have any subcalls.

我刚刚遇到这个,我想分享我的实施。它认为它很快我没有任何子句。

It returns the index in the haystack where the needle is found or -1 if it was not found.

它返回找到针的haystack中的索引,如果没有找到则返回-1。

/* binary search in memory */
int memsearch(const char *hay, int haysize, const char *needle, int needlesize) {
    int haypos, needlepos;
    haysize -= needlesize;
    for (haypos = 0; haypos <= haysize; haypos++) {
        for (needlepos = 0; needlepos < needlesize; needlepos++) {
            if (hay[haypos + needlepos] != needle[needlepos]) {
                // Next character in haystack.
                break;
            }
        }
        if (needlepos == needlesize) {
            return haypos;
        }
    }
    return -1;
}

#1


5  

If you're afraid of O(m*n) behaviour - basically, you needn't, such cases don't occur naturally - here's a KMP implementation I had lying around which I've modified to take the length of the haystack. Also a wrapper. If you want to do repeated searches, write your own and reuse the borders array.

如果你害怕O(m * n)行为 - 基本上,你不需要,这种情况不会自然发生 - 这是我所说的KMP实现,我已经修改了它以占用大海捞针的长度。也是一个包装。如果您想重复搜索,请编写自己的搜索并重复使用borders数组。

No guarantees for bug-freeness, but it seems to still work.

不保证无错误,但它似乎仍然有效。

int *kmp_borders(char *needle, size_t nlen){
    if (!needle) return NULL;
    int i, j, *borders = malloc((nlen+1)*sizeof(*borders));
    if (!borders) return NULL;
    i = 0;
    j = -1;
    borders[i] = j;
    while((size_t)i < nlen){
        while(j >= 0 && needle[i] != needle[j]){
            j = borders[j];
        }
        ++i;
        ++j;
        borders[i] = j;
    }
    return borders;
}

char *kmp_search(char *haystack, size_t haylen, char *needle, size_t nlen, int *borders){
    size_t max_index = haylen-nlen, i = 0, j = 0;
    while(i <= max_index){
        while(j < nlen && *haystack && needle[j] == *haystack){
            ++j;
            ++haystack;
        }
        if (j == nlen){
            return haystack-nlen;
        }
        if (!(*haystack)){
            return NULL;
        }
        if (j == 0){
            ++haystack;
            ++i;
        } else {
            do{
                i += j - (size_t)borders[j];
                j = borders[j];
            }while(j > 0 && needle[j] != *haystack);
        }
    }
    return NULL;
}

char *sstrnstr(char *haystack, char *needle, size_t haylen){
    if (!haystack || !needle){
        return NULL;
    }
    size_t nlen = strlen(needle);
    if (haylen < nlen){
        return NULL;
    }
    int *borders = kmp_borders(needle, nlen);
    if (!borders){
        return NULL;
    }
    char *match = kmp_search(haystack, haylen, needle, nlen, borders);
    free(borders);
    return match;
}

#2


5  

See if the function below works for you. I haven't tested it thoroughly, so I would suggest you do so.

看看下面的功能是否适合您。我没有彻底测试过,所以我建议你这样做。

char *sstrstr(char *haystack, char *needle, size_t length)
{
    size_t needle_length = strlen(needle);
    size_t i;

    for (i = 0; i < length; i++)
    {
        if (i + needle_length > length)
        {
            return NULL;
        }

        if (strncmp(&haystack[i], needle, needle_length) == 0)
        {
            return &haystack[i];
        }
    }
    return NULL;
}

#3


2  

I just came across this and I'd like to share my implementation. It think it quite fast a I don't have any subcalls.

我刚刚遇到这个,我想分享我的实施。它认为它很快我没有任何子句。

It returns the index in the haystack where the needle is found or -1 if it was not found.

它返回找到针的haystack中的索引,如果没有找到则返回-1。

/* binary search in memory */
int memsearch(const char *hay, int haysize, const char *needle, int needlesize) {
    int haypos, needlepos;
    haysize -= needlesize;
    for (haypos = 0; haypos <= haysize; haypos++) {
        for (needlepos = 0; needlepos < needlesize; needlepos++) {
            if (hay[haypos + needlepos] != needle[needlepos]) {
                // Next character in haystack.
                break;
            }
        }
        if (needlepos == needlesize) {
            return haypos;
        }
    }
    return -1;
}