re.S,使 '.' 匹配换行在内的所有字符
>>> pattern=r'ghostwu.com' >>> import re >>> re.findall( pattern, 'ghostwuacom' ) ['ghostwuacom'] >>> re.findall( pattern, 'ghostwubcom' ) ['ghostwubcom'] >>> re.findall( pattern, 'ghostwu.com' ) ['ghostwu.com'] >>> re.findall( pattern, 'ghostwu\ncom' ) [] >>> re.findall( pattern, 'ghostwu\ncom', re.S ) ['ghostwu\ncom'] >>>
re.M,多行匹配,主要影响( ^和$ )
>>> str=""" ... hi,ghostwu,how are you ... ghostwu: my name is ghostwu,how are you ... ghostwu: nice to meet you ... hello ghostwu ... """ >>> pattern = r"^ghostwu" >>> re.findall( pattern, str ) [] >>> re.findall( pattern, str, re.M ) ['ghostwu', 'ghostwu'] >>>
当正则有多行的时候,可以开启verbose模式re.X
>>> pattern=r""" ... \d{3,4} ... -? ... \d{8} ... """ >>> str="020-88888888" >>> re.findall( pattern, str ) [] >>> re.findall( pattern, str, re.X ) ['020-88888888'] >>>
():分组与| 的使用, 假如我们要匹配一个.com,.cn,.net结尾的email
>>> pattern=r"\w+@\w+(.com|.cn|.net)" >>> email="abc@qq.com">>> re.match( pattern, email ) <_sre.SRE_Match object at 0x7f2b74481828> >>> re.match( pattern, 'abc@qq.cn' ) <_sre.SRE_Match object at 0x7f2b744818a0> >>> re.match( pattern, 'abc@qq.net' ) <_sre.SRE_Match object at 0x7f2b74481828> >>> re.match( pattern, 'abc@qq.io' ) >>>
匹配超链接
>>> html=""" ... <a href="http://www.baidu.com">百度</a> ... <a href="index.html">首页</a> ... <p>这是一段说明</p> ... <a href="http://www.taobao.com">淘宝</a> ... """ >>> re=r"href=\"(.+?)\"" >>> pattern=r"href=\"(.+?)\"" >>> re 'href=\\"(.+?)\\"' >>> import re >>> re.findall( pattern, html ) ['http://www.baidu.com', 'index.html', 'http://www.taobao.com'] >>>