正则表达式匹配(python)

时间:2021-10-21 20:12:28

获取图片的python代码

#coding=utf-8
import urllib
import re def getHtml(url):
page = urllib.urlopen(url)
html = page.read()
return html def getImg(html):
reg = r'src="(.+?\.jpg).+"'
imgre = re.compile(reg)
imglist = re.findall(imgre,html)
x = 0
for imgurl in imglist:
urllib.urlretrieve(imgurl,'%s.jpg' % x)
x+=1 html = getHtml("http://www.csdn.net/article/2015-01-15/2823564") print getImg(html)

findall和group的用法

import re
reg = r'www\.(.*)\..{3}'
imgre = re.compile(reg)
imglist = re.findall(imgre,'www.python.org')
#for imgurl in imglist:
print imglist

import re
reg = r'(.+):"(.+a)"'
imgre = re.compile(reg)
imglist = re.findall(imgre,'name:"wangjian"ok')
#for imgurl in imglist:
print imglist

import re
reg = r'src="(.+?\.jpg)"'
imgre = re.compile(reg)
html='<img src="http://cms.csdnimg.cn/article/201501/15/54b70da54b668_middle.jpg?_=48735" style="float: none; margin: 0px;" alt="">'
imglist = re.findall(imgre,html)
print imglist

参考https://docs.python.org/2/library/re.html#re.findall

python的search和match的区别

精通正则表达式第三版

解释了在正则表达式中\b元字符的使用的参考文档如下:

http://www.cnblogs.com/85538649/archive/2011/07/26/wtq0705.html

http://www.cnblogs.com/deerchao/archive/2006/08/24/zhengzhe30fengzhongjiaocheng.html