正则表达式在匹配字符串,验证输入合法性时经常用到.C++ 11标准库中已经支持了正则表达式.以下对于几个常用的验证进行实现,以体会C++正则表达式的用法.
注意:
在C++中,对于特殊字符,需要使用转义字符. 因此,匹配数字的\d,需要写成\d这种格式.
经典的三部曲匹配:
1.先写pattern. string pattern = {“XXXX”};
2.使用re. regex re(pattern);
3.match. bool rs = regex_match(mobile, re);
4.regex_match在匹配一次后即返回结果,如果期望进行多次匹配,需要用到regex_iterator.std::regex_iterator<std::string::iterator> rit(result.begin(), result.end(), re);
匹配结果的输出可以利用cmatch.regex_match(rit->str().c_str(), cm, re);
这里面cm的内容与正则表达式pattern是对应匹配的. 具体可参考matchPriceInfo方法.
程序运行结果:
#ifndef _COMMON_REGEX_METHOD_H_
#define _COMMON_REGEX_METHOD_H_
#include <string>
#include <fstream>
#include <sstream>
using namespace std;
class CommonRegexMethod
{
public:
static bool mobileMatchCheck(const string mobile);
static bool idCardCheck(const string idCard);
static bool emailCheck(const string email);
static bool urlCheck(const string url);
static bool ipV4Check(const string ipV4);
static bool ipV6Check(const string ipV6);
static bool creditCardCheck(const string creditCard);
static bool passwordCheck(const string password);
static void vagueSearch(const string* searchSpace, unsigned int searchSpaceLen, const string searchStr);
static void matchPriceInfo(const char* fileName, const string pattern, const unsigned int numInPattern);
private:
static ostringstream* readFileIntoString(const char* fileName);
static bool convert(const char c, int &rs);
};
#endif // !_COMMON_REGEX_METHOD_H_
#include "CommonRegexMethod.h"
#include <regex>
#include <iostream>
bool CommonRegexMethod::mobileMatchCheck(const string mobile)
{
//中国移动:
// 139、138、137、136、135、134、159、158、157、150、151、152、147(数据卡)、188、187、182、183、184、178
// 移动网络制式: 2G GSM;3G TD - SCDMA;4G TDD - LTE
// 中国联通:
// 130、131、132、156、155、186、185、145(数据卡)、176
// 联通网络制式: 2G GSM;3G WCDMA;4G FDD - LTE和TDD - LTE
// 中国电信:
// 133、153、189、180、181、177、173(待放)
//注意\可以用来分行,但千万不要加空格
//严格匹配两个字符用([x][x])|([y][y]),有几种可能中间可以用|
//不能用(xx)|(yy)
string pattern{ "(([8][6])|([\+][8][6])|([\(][8][6][\)])|([\(][\+][8][6][\)]))?\
1(([3][8])|([3][7])|([3][6])|([3][5])|([3][4])|([5][9])|([5][8])|([5][7])|([5][0])|\
([5][1])|([5][2])|([4][7])|([8][8])|([8][7])|([8][2])|([8][3])|([8][4])|([7][8])|\
([3][0])|([3][1])|([3][2])|([5][6])|([5][5])|([8][6])|([8][5])|([4][5])|([7][6])|\
([3][3])|([5][3])|([8][9])|([8][0])|([8][1])|([7][7])|([7][3]))\\d{8}" };
regex re(pattern);
bool rs = regex_match(mobile, re);
if (rs)
{
cout << mobile <<" is the valid mobile phone number in CMCC/CU/CT" << endl;
}
else
{
cout << mobile << " is the invalid mobile phone number in CMCC/CU/CT" << endl;
}
return rs;
}
bool CommonRegexMethod::idCardCheck(const string idCard)
{
const unsigned int weightArray[17] = { 7, 9, 10, 5, 8, 4, 2, 1, 6, 3, 7, 9, 10, 5, 8, 4, 2 };
const unsigned int crcFlagArray[11] = { 1, 0, 10, 9, 8, 7, 6, 5, 4, 3, 2 };
bool rs = false;
string pattern = {"[1-9]\\d{5}(([1][9])|([2][0]))\\d{2}(([0][1])|([0][2])|([0][3])|\
([0][4])|([0][5])|([0][6])|([0][7])|([0][8])|([0][9])|([1][0])|([1][1])|([1][2]))\
[0-3]\\d{4}(([0-9])|(X)|(x))"};
regex re(pattern);
rs = regex_match(idCard, re);
if (rs)
{
int convertRs = -1;
unsigned int sum = 0;
for (int i = 0; i < 17; i++)
{
rs = convert(idCard[i], convertRs);
if (rs && (-1 != convertRs))
{
sum += convertRs*weightArray[i];
}
else
{
rs = false;
break;
}
}
unsigned int index = sum % 11;
rs = convert(idCard[17], convertRs);
if (!rs)
{
cout << idCard << " is the invalid 2nd idCard in China" << endl;
}
else
{
if (convertRs == crcFlagArray[index])
cout << idCard << " is the valid 2nd idCard in China" << endl;
else
cout << idCard << " is the invalid 2nd idCard in China" << endl;
}
}
else
{
cout << idCard << " is the invalid 2nd idCard in China" << endl;
}
return rs;
}
bool CommonRegexMethod::emailCheck(const string email)
{
bool rs = false;
string pattern = { "\\w+([-+.]\\w+)*@\\w+([-.]\\w+)*\\.\\w+([-.]\\w+)*" }; //C++中,\符号需要转义,因此写作两个\\符号
regex re(pattern);
rs = regex_match(email, re);
if (rs)
{
cout << email << " is the valid email address" << endl;
}
else
{
cout << email << " is the invalid email address" << endl;
}
return rs;
}
bool CommonRegexMethod::urlCheck(const string url)
{
bool rs = false;
string pattern = {"(([h][t][t][p][:][/][/])|([h][t][t][p][s][:][/][/]))?(\\w+)\\.((\\w+)|(.))*"}; //在括号内()[]的特殊字符,可以不用转义,也可以用转义.括号外的一定要转义
regex re(pattern);
rs = regex_match(url, re);
if (rs)
{
cout << url << " is the valid url address" << endl;
}
else
{
cout << url << " is the invalid url address" << endl;
}
return rs;
}
bool CommonRegexMethod::ipV4Check(const string ipV4)
{
bool rs = false;
string pattern = {"((([0-9])|([1-9]\\d{1})|([1]\\d{2})|([2][0-4]\\d{1})|([2][5][0-5]))\\.){3}\
(([0-9])|([1-9]\\d{1})|([1]\\d{2})|([2][0-4]\\d{1})|([2][5][0-5]))"};
regex re(pattern);
rs = regex_match(ipV4, re);
if (rs)
{
cout << ipV4 << " is the valid ipv4 address" << endl;
}
else
{
cout << ipV4 << " is the invalid ipv4 address" << endl;
}
return rs;
}
bool CommonRegexMethod::ipV6Check(const string ipV6)
{
bool rs = false;
string pattern = {"((([0-9A-Fa-f]{1,4}:){7}[0-9A-Fa-f]{1,4})|\
(([0-9A-Fa-f]{1,4}:){6}:[0-9A-Fa-f]{1,4})|\
(([0-9A-Fa-f]{1,4}:){5}:([0-9A-Fa-f]{1,4}:)?[0-9A-Fa-f]{1,4})|\
(([0-9A-Fa-f]{1,4}:){4}:([0-9A-Fa-f]{1,4}:){0,2}[0-9A-Fa-f]{1,4})|\
(([0-9A-Fa-f]{1,4}:){3}:([0-9A-Fa-f]{1,4}:){0,3}[0-9A-Fa-f]{1,4})|\
(([0-9A-Fa-f]{1,4}:){2}:([0-9A-Fa-f]{1,4}:){0,4}[0-9A-Fa-f]{1,4})|\
(([0-9A-Fa-f]{1,4}:){6}((\\b((25[0-5])|(1\\d{2})|(2[0-4]\\d)|(\\d{1,2}))\b)\.){3}(\\b((25[0-5])|\
(1\\d{2})|(2[0-4]\\d)|(\\d{1,2}))\b))|\
(([0-9A-Fa-f]{1,4}:){0,5}:((\\b((25[0-5])|(1\\d{2})|(2[0-4]\\d)|(\\d{1,2}))\\b)\\.){3}(\\b((25[0-5])|\
(1\\d{2})|(2[0-4]\\d)|(\\d{1,2}))\\b))|\
(::([0-9A-Fa-f]{1,4}:){0,5}((\\b((25[0-5])|(1\\d{2})|(2[0-4]\\d)|(\\d{1,2}))\\b)\\.){3}(\\b((25[0-5])|\
(1\\d{2})|(2[0-4]\\d)|(\\d{1,2}))\\b))|\
([0-9A-Fa-f]{1,4}::([0-9A-Fa-f]{1,4}:){0,5}[0-9A-Fa-f]{1,4})|\
(::([0-9A-Fa-f]{1,4}:){0,6}[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){1,7}:))"};
regex re(pattern);
rs = regex_match(ipV6, re);
if (rs)
{
cout << ipV6 << " is the valid ipv6 address" << endl;
}
else
{
cout << ipV6 << " is the invalid ipv4 address" << endl;
}
return rs;
}
bool CommonRegexMethod::creditCardCheck(const string creditCard)
{
bool rs = false;
string pattern = {"((4\\d{3})|(5[1-5]\\d{2})|(6011)|(62\\d{2}))-?\\d{4}-?\\d{4}-?\\d{4}|3[4,7]\\d{13}"};
regex re(pattern);
rs = regex_match(creditCard, re);
if (rs)
{
cout << creditCard << " is the valid creditCard address" << endl;
}
else
{
cout << creditCard << " is the invalid creditCard address" << endl;
}
return rs;
}
bool CommonRegexMethod::passwordCheck(const string password)
{
bool rs = false;
unsigned int len = password.length();
if ((len >= 6) && (len <= 16))
{
string pattern1 = { "(.*)[A-Z](.*)" };
string pattern2 = { "(.*)[a-z](.*)" };
string pattern3 = { "(.*)[0-9](.*)" };
string pattern4 = { "(.*)[^A-Za-z0-9](.*)" };
regex re(pattern1);
rs = regex_match(password, re);
if (rs)
{
regex re(pattern2);
rs = regex_match(password, re);
if (rs)
{
regex re(pattern3);
rs = regex_match(password, re);
if (rs)
{
regex re(pattern4);
rs = regex_match(password, re);
if (rs)
{
rs = true;
cout << "password valid!" << endl;
}
else
{
cout << "password invalid pattern4!" << endl;
}
}
else
{
cout << "password invalid pattern3!" << endl;
}
}
else
{
cout << "password invalid pattern2!" << endl;
}
}
else
{
cout << "password invalid pattern1!" << endl;
}
}
else
{
cout << "len invalid!" << endl;
}
return rs;
}
void CommonRegexMethod::vagueSearch(const string* searchSpace, unsigned int searchSpaceLen, const string searchStr)
{
const string* oriPtr = searchSpace;
bool rs = false;
string prePattern = "(.*)" + searchStr + "(.*)";
string pattern = { prePattern };
regex re(pattern);
for (unsigned int i = 0; i < searchSpaceLen; i++)
{
rs = regex_match(*searchSpace, re);
if (rs)
{
cout << *searchSpace << " match the pattern:" << searchStr << endl;
}
searchSpace++;
}
searchSpace = oriPtr;
}
void CommonRegexMethod::matchPriceInfo(const char* fileName, const string pattern, const unsigned int numInPattern)
{
ostringstream* buf = readFileIntoString(fileName);
if (NULL != buf)
{
string result = buf->str();
std::regex re(pattern);
std::regex_iterator<std::string::iterator> rit(result.begin(), result.end(), re);
std::regex_iterator<std::string::iterator> rend;
while (rit != rend)
{
std::cmatch cm;
regex_match(rit->str().c_str(), cm, re);
for (int i = 0; i < cm.size(); i++)
{
std::cout << "CM:" << i << cm[i] << endl;
}
++rit;
}
}
else
{
}
delete buf;
buf = NULL;
return;
}
ostringstream* CommonRegexMethod::readFileIntoString(const char* filename)
{
ifstream fin;
ostringstream *buf = NULL;
fin.open(filename, ios::binary);
if (fin.is_open())
{
buf = new ostringstream();
while (!fin.eof())
{
wchar_t wch;
fin.read((char *)(&wch), 2);
buf->put(wch);
}
}
return buf;
}
bool CommonRegexMethod::convert(const char c, int &rs)
{
bool result = true;
switch (c)
{
case '0':
rs = 0;
break;
case '1':
rs = 1;
break;
case '2':
rs = 2;
break;
case '3':
rs = 3;
break;
case '4':
rs = 4;
break;
case '5':
rs = 5;
break;
case '6':
rs = 6;
break;
case '7':
rs = 7;
break;
case '8':
rs = 8;
break;
case '9':
rs = 9;
break;
case 'x':
case 'X':
rs = 10;
break;
default:
result = false;
break;
}
return result;
}
部分测试代码如下:
void testRegex()
{ CommonRegexMethod::emailCheck("jack.jack@163.com"); CommonRegexMethod::emailCheck("123@456.com"); CommonRegexMethod::emailCheck("_123@456.com"); CommonRegexMethod::emailCheck("tiantian456.com"); CommonRegexMethod::emailCheck("tiantian@456com"); CommonRegexMethod::urlCheck("http://www.sina.com"); CommonRegexMethod::urlCheck("https://www.sina.com"); CommonRegexMethod::urlCheck("http://wwwm"); CommonRegexMethod::urlCheck("http://wwwsina.com"); CommonRegexMethod::urlCheck("wwwsina.com"); CommonRegexMethod::urlCheck("www.sina.com"); CommonRegexMethod::urlCheck("htp://www.sina.com"); CommonRegexMethod::urlCheck("ht//www.sina.com"); CommonRegexMethod::ipV4Check("255"); CommonRegexMethod::ipV4Check("1"); CommonRegexMethod::ipV4Check("61"); CommonRegexMethod::ipV4Check("265"); CommonRegexMethod::ipV4Check("355"); CommonRegexMethod::ipV4Check("256"); CommonRegexMethod::ipV4Check("0"); CommonRegexMethod::ipV4Check("01"); CommonRegexMethod::ipV4Check("255."); CommonRegexMethod::ipV4Check("1."); CommonRegexMethod::ipV4Check("61."); CommonRegexMethod::ipV4Check("265."); CommonRegexMethod::ipV4Check("355."); CommonRegexMethod::ipV4Check("256."); CommonRegexMethod::ipV4Check("0."); CommonRegexMethod::ipV4Check("01."); CommonRegexMethod::ipV4Check("255.255"); CommonRegexMethod::ipV4Check("1.1"); CommonRegexMethod::ipV4Check("61.61"); CommonRegexMethod::ipV4Check("265.265"); CommonRegexMethod::ipV4Check("355.355"); CommonRegexMethod::ipV4Check("256.256"); CommonRegexMethod::ipV4Check("0.0"); CommonRegexMethod::ipV4Check("01.01"); CommonRegexMethod::ipV4Check("255.255.255.0"); CommonRegexMethod::ipV4Check("355.255.255.0"); CommonRegexMethod::ipV4Check("55.255.255.0"); CommonRegexMethod::ipV4Check("255.25.255.0"); CommonRegexMethod::ipV4Check("0.255.255.0"); CommonRegexMethod::ipV4Check(". . . ."); CommonRegexMethod::ipV4Check("什么情况?"); CommonRegexMethod::ipV4Check("255.2.5.255.0"); CommonRegexMethod::ipV4Check("1.25.35.0"); CommonRegexMethod::ipV6Check("ABCD:CDCD:FFEE:3232:EDED:FFFF:4353:1234"); CommonRegexMethod::ipV6Check("ABCD:CDCD:FYEE:3232:EDED:FFFF:4353:1234"); CommonRegexMethod::ipV6Check("8888::8:800:7777:4444"); CommonRegexMethod::ipV6Check("::FFFF:129.24.6.7"); CommonRegexMethod::ipV6Check("FEDC::7654:3210::BA98:7654:3210"); CommonRegexMethod::ipV6Check("FEDC:BA98:7654:3210"); CommonRegexMethod::ipV6Check("::"); CommonRegexMethod::ipV6Check("什么情况?"); CommonRegexMethod::ipV6Check("255.2.5.255.0"); CommonRegexMethod::ipV6Check("1.25.35.0"); CommonRegexMethod::creditCardCheck("4534343421211212"); CommonRegexMethod::creditCardCheck("5534343421211212"); CommonRegexMethod::creditCardCheck("8534343421211212"); CommonRegexMethod::creditCardCheck("6234343421211212"); CommonRegexMethod::creditCardCheck("343434342121112"); CommonRegexMethod::creditCardCheck("453434342121112"); CommonRegexMethod::creditCardCheck("55343434212311212"); CommonRegexMethod::creditCardCheck("853434342211212"); CommonRegexMethod::creditCardCheck("623433421211212"); CommonRegexMethod::creditCardCheck("3434343421212112"); }