Linux中自带正则表达式应用举例

时间:2022-10-26 14:50:02

 环境:Fedora12, C程序:

#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <regex.h>

// 提取子串
char* getsubstr(char *s, regmatch_t *pmatch)
{
	static char buf[100] = {0};
	memset(buf, 0, sizeof(buf));
	memcpy(buf, s+pmatch->rm_so, pmatch->rm_eo - pmatch->rm_so);

	return buf;
}

int main(int argc, char **argv)
{
	int status, i;
	int cflags = REG_EXTENDED;
	regmatch_t pmatch[5];
	const size_t nmatch = 5;
	regex_t reg;
	const char *pattern = "([A-Z]+)([a-z]+)ID[0-9]+@([a-z]+)\\.([a-z]+)";	// 正则表达式
	char buf[] = "COMEdavID2012@gmail.com";		// 待搜索的字符串

	regcomp(®, pattern, cflags);
	status = regexec(®, buf, nmatch, pmatch, 0);
	if(status == REG_NOMATCH)
		printf("No Match\n");
	else
	{
		printf("Match:\n");
		for(i = 0; i < nmatch; i++)
		{
			if(pmatch[i].rm_so == -1)
				continue;
			char *p = getsubstr(buf, &pmatch[i]);
			printf("[%d, %d): %s\n", pmatch[i].rm_so, pmatch[i].rm_eo, p);
		}
	}
	regfree(®);

	return 0;
}


编译运行:

[zcm@t #52]$make
gcc    -c -o a.o a.c
gcc  -o a a.o
[zcm@t #53]$./a
Match:
[0, 23): COMEdavID2012@gmail.com
[0, 4): COME
[4, 7): dav
[14, 19): gmail
[20, 23): com
[zcm@t #54]$


注意

pmatch[0]用来匹配整个正则表达式

pmatch[1]用来匹配子模式1

pmatch[2]用来匹配子模式2

......

 

思考

所以如果想从待搜索的字符串中搜索出所有匹配的结果(假设大于1个),怎么办呢?------- 循环调用regexec,代码如下:

/*
	Posix正则表达式应用:循环调用regexec(),以获得多个匹配的结果
*/
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <regex.h>

// 提取子串
char* getsubstr(char *s, regmatch_t *pmatch)
{
	static char buf[100] = {0};
	memset(buf, 0, sizeof(buf));
	memcpy(buf, s+pmatch->rm_so, pmatch->rm_eo - pmatch->rm_so);

	return buf;
}

int main(int argc, char **argv)
{
	int status, i;
	int cflags = REG_EXTENDED;
	regmatch_t pmatch[10];
	const size_t nmatch = 10;
	regex_t reg;
	//const char *pattern = "([A-Z]+)([a-z]+)(ID|DB)[0-9]+@([a-z]+)\\.([a-z]+)";	// 正则表达式
	const char *pattern = "[[:upper:]]+([[:lower:]]+)";	// 正则表达式
	char buf[] = "c COMEdavDB2012@gmail.com ZHOUcimingID2030@sohu.com";		// 待搜索的字符串
	char *pSrc = buf, *p = NULL;
	int next = 0;
	int mCount = 1;											// 匹配的次数
	int len = strlen(buf);

	regcomp(®, pattern, cflags);							// 编译正则表达式
	do														// 循环搜索匹配的结果
	{
		printf("pSrc = %s\n", pSrc);
		status = regexec(®, pSrc, nmatch, pmatch, 0);
		if(status == REG_NOMATCH)							// 未找到匹配的结果
		{
			printf("No Match%d\n", mCount);
			break;
		}
		else
		{
			printf("Match%d:\n", mCount);
			for(i = 0; i < nmatch; i++)						// 输出此次匹配的结果(包括子模式)
			{
				if(pmatch[i].rm_so == -1)
					break;
				p = getsubstr(pSrc, &pmatch[i]);
				printf("pmatch[%d] = [%d, %d): %s\n", i, pmatch[i].rm_so, pmatch[i].rm_eo, p);
			}
			putchar('\n');
			pSrc = pSrc + pmatch[0].rm_eo;					// 后移搜索的起始位置
		}
		mCount++;
	}while(pSrc < buf + len - 1);
	
	regfree(®);

	return 0;
}
编译运行:

[zcm@t #157]$make
gcc    -c -o a2.o a2.c
gcc  -o a2 a2.o
[zcm@t #158]$./a2
pSrc = c COMEdavDB2012@gmail.com ZHOUcimingID2030@sohu.com
Match1:
pmatch[0] = [2, 9): COMEdav
pmatch[1] = [6, 9): dav

pSrc = DB2012@gmail.com ZHOUcimingID2030@sohu.com
Match2:
pmatch[0] = [17, 27): ZHOUciming
pmatch[1] = [21, 27): ciming

pSrc = ID2030@sohu.com
No Match3
[zcm@t #159]$