1、findall---从头找到尾,把找成功的结果放到列表中
import re print(re.findall('alex','12a3 alex say hello alex sb 123 _ 4%5*6'))
#alex, 匹配成功一个,从空格开始继续匹配,a成功,3不成功,再从3开始进行匹配;
2、\w---字母数字下划线
print(re.findall('\w','alex say alex sb 123 _ 4%5*')) ['a', 'l', 'e', 'x', 's', 'a', 'y', 'a', 'l', 'e', 'x', 's', 'b', '1', '2', '3', '_', '4', '5']
3、\W---非字母数字下划线
print(re.findall('\W','alex say alex sb 123 _ 4%5*')) [' ', ' ', ' ', ' ', ' ', ' ', '%', '*']
4、\s \S---(非)空白字符
print(re.findall('\s','al\te\nx hello alex sb 123 _ 4%5*')) print(re.findall('\S','al\te\nx hello alex sb 123 _ 4%5*')) ['\t', '\n', ' ', ' ', ' ', ' ', ' ', ' '] ['a', 'l', 'e', 'x', 'h', 'e', 'l', 'l', 'o', 'a', 'l', 'e', 'x', 's', 'b', '1', '2', '3', '_', '4', '%', '5', '*'] #\t \n \等
5、\d---数字
print(re.findall('\d','al\te\nx hello alex sb 123 _ 4%5*')) print(re.findall('\d\d','al\te\nx hel12345lo alex sb 123 _ 4%5*'))
#后者匹配连续两位数
#\D---匹配非数字
6、\A---只从开头找
print(re.findall('\Al','alex say hello')) []
7、\Z---只从结尾找
print(re.findall('llo\Z','alex say hello')) ['llo']
以上两个类似于:
print(re.findall('^l','alex say hello')) print(re.findall('llo$','alex say hello'))
8、\n \t
print(re.findall('\n','al\te\nx hello alex sb 123 _ 4%5*')) print(re.findall('\t','al\te\nx hello alex sb 123 _ 4%5*')) ['\n'] ['\t']
9、重复匹配:. [] ? * + {}
print(re.findall('a.c','a1c a%c abc accc acccc')) ['a1c', 'a%c', 'abc', 'acc', 'acc'] #注:只能匹配一行行的 print(re.findall('a.c','a1c a%c a\nc accc acccc')) ['a1c', 'a%c', 'acc', 'acc'] print(re.findall('a.c','a1c a%c a\nc accc acccc',re.S)) print(re.findall('a.c','a1c a%c a\nc accc acccc',re.DOTALL)) 这样就可以匹配换行符,其他类似。
print(re.findall('ab?','a ab abb abbb abbbbbb')) ['a', 'ab', 'ab', 'ab', 'ab']
print(re.findall('ab*','a ab abb abbb abbbbbb abbc123bbbb')) ['a', 'ab', 'abb', 'abbb', 'abbbbbb', 'abb']
print(re.findall('ab+','a ab abb abbb abbbbbb abbc123bbbb')) ['ab', 'abb', 'abbb', 'abbbbbb', 'abb']
print(re.findall('ab{1,3}','a ab abb abbb abbbbbb abbc123bbbb')) print(re.findall('ab{3}','a ab abb abbb abbbbbb abbc123bbbb')) print(re.findall('ab{3,}','a ab abb abbb abbbbbb abbc123bbbb')) ['ab', 'abb', 'abbb', 'abbb', 'abb'] #1到3个 ['abbb', 'abbb'] #3个 ['abbb', 'abbbbbb'] #至少3个 print(re.findall('ab{0,1}','a ab abb abbb abbbbbb abbc123bbbb')) print(re.findall('ab{0,}','a ab abb abbb abbbbbb abbc123bbbb')) print(re.findall('ab{1,}','a ab abb abbb abbbbbb abbc123bbbb')) 取代: ? * +
10、.*匹配所有字符
贪婪匹配,找到最长,最全的满足的字符
print(re.findall('a.*b','a123b456b'))
['a123b456b']
11、非贪婪匹配:.*? 固定格式,不具有?的特殊含义
print(re.findall('a.*?b','a123b456b')) ['a123b']
12、分组:()
#分组前: print(re.findall('<imag href=\".*\" />', '<h1>hello</h1><a href="http://www.baidu.com"></a><imag href="http://www.baidu.com/a.jpg" />')) #分组后:只留下组内的东西 print(re.findall('<imag href=\"(.*)\" />', '<h1>hello</h1><a href="http://www.baidu.com"></a><imag href="http://www.baidu.com/a.jpg" />')) ['<imag href="http://www.baidu.com/a.jpg" />'] ['http://www.baidu.com/a.jpg'] ?:同分组前 print(re.findall('<imag href=\"(?:.*)\" />', '<h1>hello</h1><a href="http://www.baidu.com"></a><imag href="http://www.baidu.com/a.jpg" />')) 结果: ['<imag href="http://www.baidu.com/a.jpg" />'] ['http://www.baidu.com/a.jpg'] ['<imag href="http://www.baidu.com/a.jpg" />']
13、或者:|
print(re.findall('compan(ies|y)','Too many companies have gone bankrupt, and the next one is my company')) print(re.findall('compan(?:ies|y)','Too many companies have gone bankrupt, and the next one is my company')) ['ies', 'y'] ['companies', 'company'] #注意后面那个,取所有
14、匹配a\c
print(re.findall('a\\c','a\c a12 a2c')) # 'a\\c'字符串先被pycharm识别,然后交给re模块,做正则匹配这件事。
raw str 原生字符串,不需要python解释器转义!所以,
print(re.findall(r'a\\c','a\c a12 a2c')) #'a\\c' ['a\\c']
补充---[]
print(re.findall('a[0-9]c','a1c a%c a\nc accc acccc',re.S)) print(re.findall('a[a-z]c','a1c a%c a\nc accc acccc',re.S)) print(re.findall('a[A-Z]c','a1c a%c a\nc accc acccc aAc aAAc',re.S)) print(re.findall('a[0-9a-zA-Z]c','a1c a%c a\nc accc acccc aAc aAAc',re.S)) print(re.findall('a[% ]c','a c a1c a%c a+c a-c a/c a*c',re.S)) #%或空格取出来 print(re.findall('a[^% ]c','a c a1c a%c a+c a-c a/c a*c',re.S)) #上面取反 print(re.findall('a[+\-*/]c','a c a1c a%c a+c a-c a/c a*c',re.S)) print(re.findall('a[-+*/]c','a c a1c a%c a+c a-c a/c a*c',re.S)) #-号放在左边或者右边 print(re.findall('a[+*/-]c','a c a1c a%c a+c a-c a/c a*c',re.S)) print(re.findall('a.*?c','a c a1c a%c a+c a-c a/c a*c',re.S))