C++常用正则表达式匹配

时间:2021-04-28 05:57:26

正则表达式在匹配字符串,验证输入合法性时经常用到.C++ 11标准库中已经支持了正则表达式.以下对于几个常用的验证进行实现,以体会C++正则表达式的用法.

注意:
在C++中,对于特殊字符,需要使用转义字符. 因此,匹配数字的\d,需要写成\d这种格式.
经典的三部曲匹配:
1.先写pattern. string pattern = {“XXXX”};
2.使用re. regex re(pattern);
3.match. bool rs = regex_match(mobile, re);
4.regex_match在匹配一次后即返回结果,如果期望进行多次匹配,需要用到regex_iterator.std::regex_iterator<std::string::iterator> rit(result.begin(), result.end(), re); 匹配结果的输出可以利用cmatch.regex_match(rit->str().c_str(), cm, re);这里面cm的内容与正则表达式pattern是对应匹配的. 具体可参考matchPriceInfo方法.

程序运行结果:
C++常用正则表达式匹配

#ifndef _COMMON_REGEX_METHOD_H_
#define _COMMON_REGEX_METHOD_H_

#include <string>
#include <fstream>
#include <sstream>
using namespace std;

class CommonRegexMethod
{
public:
    static bool mobileMatchCheck(const string mobile);
    static bool idCardCheck(const string idCard);
    static bool emailCheck(const string email);
    static bool urlCheck(const string url);
    static bool ipV4Check(const string ipV4);
    static bool ipV6Check(const string ipV6);
    static bool creditCardCheck(const string creditCard);
    static bool passwordCheck(const string password);
    static void vagueSearch(const string* searchSpace, unsigned int searchSpaceLen, const string searchStr);
    static void matchPriceInfo(const char* fileName, const string pattern, const unsigned int numInPattern);
private:
    static ostringstream* readFileIntoString(const char* fileName);
    static bool convert(const char c, int &rs);
};

#endif // !_COMMON_REGEX_METHOD_H_
#include "CommonRegexMethod.h"
#include <regex>
#include <iostream>

bool CommonRegexMethod::mobileMatchCheck(const string mobile)
{
 //中国移动:
 // 139、138、137、136、135、134、159、158、157、150、151、152、147(数据卡)、188、187、182、183、184、178
 // 移动网络制式: 2G GSM;3G TD - SCDMA;4G TDD - LTE
 // 中国联通:
 // 130、131、132、156、155、186、185、145(数据卡)、176
 // 联通网络制式: 2G GSM;3G WCDMA;4G FDD - LTE和TDD - LTE
 // 中国电信:
 // 133、153、189、180、181、177、173(待放)
 //注意\可以用来分行,但千万不要加空格
 //严格匹配两个字符用([x][x])|([y][y]),有几种可能中间可以用|
 //不能用(xx)|(yy)
 string pattern{ "(([8][6])|([\+][8][6])|([\(][8][6][\)])|([\(][\+][8][6][\)]))?\
1(([3][8])|([3][7])|([3][6])|([3][5])|([3][4])|([5][9])|([5][8])|([5][7])|([5][0])|\
([5][1])|([5][2])|([4][7])|([8][8])|([8][7])|([8][2])|([8][3])|([8][4])|([7][8])|\
([3][0])|([3][1])|([3][2])|([5][6])|([5][5])|([8][6])|([8][5])|([4][5])|([7][6])|\
([3][3])|([5][3])|([8][9])|([8][0])|([8][1])|([7][7])|([7][3]))\\d{8}" };
 regex re(pattern);
 bool rs = regex_match(mobile, re);
 if (rs)
 {
 cout << mobile <<" is the valid mobile phone number in CMCC/CU/CT" << endl;
 }
 else
 {
 cout << mobile << " is the invalid mobile phone number in CMCC/CU/CT" << endl;
 }
 return rs;
}

bool CommonRegexMethod::idCardCheck(const string idCard)
{
 const unsigned int weightArray[17] = { 7, 9, 10, 5, 8, 4, 2, 1, 6, 3, 7, 9, 10, 5, 8, 4, 2 };
 const unsigned int crcFlagArray[11] = { 1, 0, 10, 9, 8, 7, 6, 5, 4, 3, 2 };
 bool rs = false;
 string pattern = {"[1-9]\\d{5}(([1][9])|([2][0]))\\d{2}(([0][1])|([0][2])|([0][3])|\
([0][4])|([0][5])|([0][6])|([0][7])|([0][8])|([0][9])|([1][0])|([1][1])|([1][2]))\
[0-3]\\d{4}(([0-9])|(X)|(x))"};
 regex re(pattern);
 rs = regex_match(idCard, re);
 if (rs)
 {
 int convertRs = -1;
 unsigned int sum = 0;
 for (int i = 0; i < 17; i++)
 {
 rs = convert(idCard[i], convertRs);
 if (rs && (-1 != convertRs))
 {
 sum += convertRs*weightArray[i];
 }
 else
 {
 rs = false;
 break;
 }
 }
 unsigned int index = sum % 11;
 rs = convert(idCard[17], convertRs);
 if (!rs)
 {
 cout << idCard << " is the invalid 2nd idCard in China" << endl;
 }
 else
 {
 if (convertRs == crcFlagArray[index])
 cout << idCard << " is the valid 2nd idCard in China" << endl;
 else
 cout << idCard << " is the invalid 2nd idCard in China" << endl;
 }
 }
 else
 {
 cout << idCard << " is the invalid 2nd idCard in China" << endl;
 }
 return rs;
}

bool CommonRegexMethod::emailCheck(const string email)
{
 bool rs = false;
 string pattern = { "\\w+([-+.]\\w+)*@\\w+([-.]\\w+)*\\.\\w+([-.]\\w+)*" }; //C++中,\符号需要转义,因此写作两个\\符号
 regex re(pattern);
 rs = regex_match(email, re);
 if (rs)
 {
 cout << email << " is the valid email address" << endl;
 }
 else
 {
 cout << email << " is the invalid email address" << endl;
 }
 return rs;
}

bool CommonRegexMethod::urlCheck(const string url)
{
 bool rs = false;
 string pattern = {"(([h][t][t][p][:][/][/])|([h][t][t][p][s][:][/][/]))?(\\w+)\\.((\\w+)|(.))*"}; //在括号内()[]的特殊字符,可以不用转义,也可以用转义.括号外的一定要转义
 regex re(pattern);
 rs = regex_match(url, re);
 if (rs)
 {
 cout << url << " is the valid url address" << endl;
 }
 else
 {
 cout << url << " is the invalid url address" << endl;
 }
 return rs;
}

bool CommonRegexMethod::ipV4Check(const string ipV4)
{
 bool rs = false;
 string pattern = {"((([0-9])|([1-9]\\d{1})|([1]\\d{2})|([2][0-4]\\d{1})|([2][5][0-5]))\\.){3}\
(([0-9])|([1-9]\\d{1})|([1]\\d{2})|([2][0-4]\\d{1})|([2][5][0-5]))"};
 regex re(pattern);
 rs = regex_match(ipV4, re);
 if (rs)
 {
 cout << ipV4 << " is the valid ipv4 address" << endl;
 }
 else
 {
 cout << ipV4 << " is the invalid ipv4 address" << endl;
 }
 return rs;
}

bool CommonRegexMethod::ipV6Check(const string ipV6)
{
 bool rs = false;
 string pattern = {"((([0-9A-Fa-f]{1,4}:){7}[0-9A-Fa-f]{1,4})|\
(([0-9A-Fa-f]{1,4}:){6}:[0-9A-Fa-f]{1,4})|\
(([0-9A-Fa-f]{1,4}:){5}:([0-9A-Fa-f]{1,4}:)?[0-9A-Fa-f]{1,4})|\
(([0-9A-Fa-f]{1,4}:){4}:([0-9A-Fa-f]{1,4}:){0,2}[0-9A-Fa-f]{1,4})|\
(([0-9A-Fa-f]{1,4}:){3}:([0-9A-Fa-f]{1,4}:){0,3}[0-9A-Fa-f]{1,4})|\
(([0-9A-Fa-f]{1,4}:){2}:([0-9A-Fa-f]{1,4}:){0,4}[0-9A-Fa-f]{1,4})|\
(([0-9A-Fa-f]{1,4}:){6}((\\b((25[0-5])|(1\\d{2})|(2[0-4]\\d)|(\\d{1,2}))\b)\.){3}(\\b((25[0-5])|\
(1\\d{2})|(2[0-4]\\d)|(\\d{1,2}))\b))|\
(([0-9A-Fa-f]{1,4}:){0,5}:((\\b((25[0-5])|(1\\d{2})|(2[0-4]\\d)|(\\d{1,2}))\\b)\\.){3}(\\b((25[0-5])|\
(1\\d{2})|(2[0-4]\\d)|(\\d{1,2}))\\b))|\
(::([0-9A-Fa-f]{1,4}:){0,5}((\\b((25[0-5])|(1\\d{2})|(2[0-4]\\d)|(\\d{1,2}))\\b)\\.){3}(\\b((25[0-5])|\
(1\\d{2})|(2[0-4]\\d)|(\\d{1,2}))\\b))|\
([0-9A-Fa-f]{1,4}::([0-9A-Fa-f]{1,4}:){0,5}[0-9A-Fa-f]{1,4})|\
(::([0-9A-Fa-f]{1,4}:){0,6}[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){1,7}:))"};
 regex re(pattern);
 rs = regex_match(ipV6, re);
 if (rs)
 {
 cout << ipV6 << " is the valid ipv6 address" << endl;
 }
 else
 {
 cout << ipV6 << " is the invalid ipv4 address" << endl;
 }
 return rs;
}

bool CommonRegexMethod::creditCardCheck(const string creditCard)
{
 bool rs = false;
 string pattern = {"((4\\d{3})|(5[1-5]\\d{2})|(6011)|(62\\d{2}))-?\\d{4}-?\\d{4}-?\\d{4}|3[4,7]\\d{13}"};
 regex re(pattern);
 rs = regex_match(creditCard, re);
 if (rs)
 {
 cout << creditCard << " is the valid creditCard address" << endl;
 }
 else
 {
 cout << creditCard << " is the invalid creditCard address" << endl;
 }
 return rs;
}

bool CommonRegexMethod::passwordCheck(const string password)
{
 bool rs = false;
 unsigned int len = password.length();
 if ((len >= 6) && (len <= 16))
 {
 string pattern1 = { "(.*)[A-Z](.*)" };
 string pattern2 = { "(.*)[a-z](.*)" };
 string pattern3 = { "(.*)[0-9](.*)" };
 string pattern4 = { "(.*)[^A-Za-z0-9](.*)" };
 regex re(pattern1);
 rs = regex_match(password, re);
 if (rs)
 {
 regex re(pattern2);
 rs = regex_match(password, re);
 if (rs)
 {
 regex re(pattern3);
 rs = regex_match(password, re);
 if (rs)
 {
 regex re(pattern4);
 rs = regex_match(password, re);
 if (rs)
 {
 rs = true;
 cout << "password valid!" << endl;
 }
 else
 {
 cout << "password invalid pattern4!" << endl;
 }
 }
 else
 {
 cout << "password invalid pattern3!" << endl;
 }
 }
 else
 {
 cout << "password invalid pattern2!" << endl;
 }
 }
 else
 {
 cout << "password invalid pattern1!" << endl;
 }
 }
 else
 {
 cout << "len invalid!" << endl;
 }

 return rs;
}

void CommonRegexMethod::vagueSearch(const string* searchSpace, unsigned int searchSpaceLen, const string searchStr)
{
 const string* oriPtr = searchSpace;
 bool rs = false;
 string prePattern = "(.*)" + searchStr + "(.*)";
 string pattern = { prePattern };
 regex re(pattern);
 for (unsigned int i = 0; i < searchSpaceLen; i++)
 {
 rs = regex_match(*searchSpace, re);
 if (rs)
 {
 cout << *searchSpace << " match the pattern:" << searchStr << endl;
 }
 searchSpace++;
 }
 searchSpace = oriPtr;
}

void CommonRegexMethod::matchPriceInfo(const char* fileName, const string pattern, const unsigned int numInPattern)
{
 ostringstream* buf = readFileIntoString(fileName);
 if (NULL != buf)
 {
 string result = buf->str();
 std::regex re(pattern);
 std::regex_iterator<std::string::iterator> rit(result.begin(), result.end(), re);
 std::regex_iterator<std::string::iterator> rend;

 while (rit != rend)
 {
 std::cmatch cm;
 regex_match(rit->str().c_str(), cm, re);
 for (int i = 0; i < cm.size(); i++)
 {
 std::cout << "CM:" << i << cm[i] << endl;
 }
 ++rit;
 }
 }
 else
 {

 }
 delete buf;
 buf = NULL;
 return;
}


ostringstream* CommonRegexMethod::readFileIntoString(const char* filename)
{
 ifstream fin;
 ostringstream *buf = NULL;
 fin.open(filename, ios::binary);
 if (fin.is_open())
 { 
 buf = new ostringstream();
 while (!fin.eof())
 {
 wchar_t wch;
 fin.read((char *)(&wch), 2);
 buf->put(wch);
 }
 }
 return buf;
}



bool CommonRegexMethod::convert(const char c, int &rs)
{
 bool result = true;
 switch (c)
 {
 case '0':
 rs = 0;
 break;
 case '1':
 rs = 1;
 break;
 case '2':
 rs = 2;
 break;
 case '3':
 rs = 3;
 break;
 case '4':
 rs = 4;
 break;
 case '5':
 rs = 5;
 break;
 case '6':
 rs = 6;
 break;
 case '7':
 rs = 7;
 break;
 case '8':
 rs = 8;
 break;
 case '9':
 rs = 9;
 break;
 case 'x':
 case 'X':
 rs = 10;
 break;
 default:
 result = false;
 break;
 }
 return result;
}

部分测试代码如下:

void testRegex()
{ CommonRegexMethod::emailCheck("jack.jack@163.com"); CommonRegexMethod::emailCheck("123@456.com"); CommonRegexMethod::emailCheck("_123@456.com"); CommonRegexMethod::emailCheck("tiantian456.com"); CommonRegexMethod::emailCheck("tiantian@456com"); CommonRegexMethod::urlCheck("http://www.sina.com"); CommonRegexMethod::urlCheck("https://www.sina.com"); CommonRegexMethod::urlCheck("http://wwwm"); CommonRegexMethod::urlCheck("http://wwwsina.com"); CommonRegexMethod::urlCheck("wwwsina.com"); CommonRegexMethod::urlCheck("www.sina.com"); CommonRegexMethod::urlCheck("htp://www.sina.com"); CommonRegexMethod::urlCheck("ht//www.sina.com"); CommonRegexMethod::ipV4Check("255"); CommonRegexMethod::ipV4Check("1"); CommonRegexMethod::ipV4Check("61"); CommonRegexMethod::ipV4Check("265"); CommonRegexMethod::ipV4Check("355"); CommonRegexMethod::ipV4Check("256"); CommonRegexMethod::ipV4Check("0"); CommonRegexMethod::ipV4Check("01"); CommonRegexMethod::ipV4Check("255."); CommonRegexMethod::ipV4Check("1."); CommonRegexMethod::ipV4Check("61."); CommonRegexMethod::ipV4Check("265."); CommonRegexMethod::ipV4Check("355."); CommonRegexMethod::ipV4Check("256."); CommonRegexMethod::ipV4Check("0."); CommonRegexMethod::ipV4Check("01."); CommonRegexMethod::ipV4Check("255.255"); CommonRegexMethod::ipV4Check("1.1"); CommonRegexMethod::ipV4Check("61.61"); CommonRegexMethod::ipV4Check("265.265"); CommonRegexMethod::ipV4Check("355.355"); CommonRegexMethod::ipV4Check("256.256"); CommonRegexMethod::ipV4Check("0.0"); CommonRegexMethod::ipV4Check("01.01"); CommonRegexMethod::ipV4Check("255.255.255.0"); CommonRegexMethod::ipV4Check("355.255.255.0"); CommonRegexMethod::ipV4Check("55.255.255.0"); CommonRegexMethod::ipV4Check("255.25.255.0"); CommonRegexMethod::ipV4Check("0.255.255.0"); CommonRegexMethod::ipV4Check(". . . ."); CommonRegexMethod::ipV4Check("什么情况?"); CommonRegexMethod::ipV4Check("255.2.5.255.0"); CommonRegexMethod::ipV4Check("1.25.35.0"); CommonRegexMethod::ipV6Check("ABCD:CDCD:FFEE:3232:EDED:FFFF:4353:1234"); CommonRegexMethod::ipV6Check("ABCD:CDCD:FYEE:3232:EDED:FFFF:4353:1234"); CommonRegexMethod::ipV6Check("8888::8:800:7777:4444"); CommonRegexMethod::ipV6Check("::FFFF:129.24.6.7"); CommonRegexMethod::ipV6Check("FEDC::7654:3210::BA98:7654:3210"); CommonRegexMethod::ipV6Check("FEDC:BA98:7654:3210"); CommonRegexMethod::ipV6Check("::"); CommonRegexMethod::ipV6Check("什么情况?"); CommonRegexMethod::ipV6Check("255.2.5.255.0"); CommonRegexMethod::ipV6Check("1.25.35.0"); CommonRegexMethod::creditCardCheck("4534343421211212"); CommonRegexMethod::creditCardCheck("5534343421211212"); CommonRegexMethod::creditCardCheck("8534343421211212"); CommonRegexMethod::creditCardCheck("6234343421211212"); CommonRegexMethod::creditCardCheck("343434342121112"); CommonRegexMethod::creditCardCheck("453434342121112"); CommonRegexMethod::creditCardCheck("55343434212311212"); CommonRegexMethod::creditCardCheck("853434342211212"); CommonRegexMethod::creditCardCheck("623433421211212"); CommonRegexMethod::creditCardCheck("3434343421212112"); }