1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
|
. 匹配除换行符以外的任意字符
^ 匹配字符串的开始
$ 匹配字符串的结束
[] 用来匹配一个指定的字符类别
? 对于前一个字符字符重复
0
次到
1
次
*
对于前一个字符重复
0
次到无穷次
{} 对于前一个字符重复m次
{m,n} 对前一个字符重复为m到n次
\d 匹配数字,相当于[
0
-
9
]
\D 匹配任何非数字字符,相当于[^
0
-
9
]
\s 匹配任意的空白符,相当于[ fv]
\S 匹配任何非空白字符,相当于[^ fv]
\w 匹配任何字母数字字符,相当于[a
-
zA
-
Z0
-
9_
]
\W 匹配任何非字母数字字符,相当于[^a
-
zA
-
Z0
-
9_
]
\b 匹配单词的开始或结束
|
1
|
>>>
import
re
|
1
2
3
|
>>>
dir
(re)
[
'DEBUG'
,
'DOTALL'
,
'I'
,
'IGNORECASE'
,
'L'
,
'LOCALE'
,
'M'
,
'MULTILINE'
,
'S'
,
'Scanner'
,
'T'
,
'TEMPLATE'
,
'U'
,
'UNICODE'
,
'VERBOSE'
,
'X'
,
'_MAXCACHE'
,
'__all__'
,
'__builtins__'
,
'__doc__'
,
'__file__'
,
'__name__'
,
'__package__'
,
'__version__'
,
'_alphanum'
,
'_cache'
,
'_cache_repl'
,
'_compile'
,
'_compile_repl'
,
'_expand'
,
'_pattern_type'
,
'_pickle'
,
'_subx'
,
'compile'
,
'copy_reg'
,
'error'
,
'escape'
,
'findall'
,
'finditer'
,
'match'
,
'purge'
,
'search'
,
'split'
,
'sre_compile'
,
'sre_parse'
,
'sub'
,
'subn'
,
'sys'
,
'template'
]
>>>
|
1
2
|
>>>
help
(re.search)
search(pattern, string, flags
=
0
)
|
1
2
3
4
5
6
7
8
|
>>> name
=
"Hello,My name is kuangl,nice to meet you..."
>>> k
=
re.search(r
'k(uan)gl'
,name)
>>>
if
k:
...
print
k.group(
0
),k.group(
1
)
...
else
:
...
print
"Sorry,not search!"
...
kuangl uan
|
1
2
|
>>>
help
(re.match)
match(pattern, string, flags
=
0
)
|
1
2
3
4
5
6
7
8
9
10
|
>>> name
=
"Hello,My name is kuangl,nice to meet you..."
>>> k
=
re.match(r
"(\H....)"
,name)
>>>
if
k:
...
print
k.group(
0
),
'\n'
,k.group(
1
)
...
else
:
...
print
"Sorry,not match!"
...
Hello
Hello
>>>
|
1
2
|
>>>
help
(re.findall)
findall(pattern, string, flags
=
0
)
|
1
2
3
|
>>> mail
=
'<user01@mail.com> <user02@mail.com> user04@mail.com'
#第3个故意没有尖括号
>>> re.findall(r
'(\w+@m....[a-z]{3})'
,mail)
[
'user01@mail.com'
,
'user02@mail.com'
,
'user04@mail.com'
]
|
1
2
|
>>>
help
(re.sub)
sub(pattern, repl, string, count
=
0
)
|
1
2
3
4
5
6
|
>>> test
=
"Hi, nice to meet you where are you from?"
>>> re.sub(r
'\s'
,
'-'
,test)
'Hi,-nice-to-meet-you-where-are-you-from?'
>>> re.sub(r
'\s'
,
'-'
,test,
5
)
#替换至第5个
'Hi,-nice-to-meet-you-where are you from?'
>>>
|
1
2
|
>>>
help
(re.split)
split(pattern, string, maxsplit
=
0
)
|
1
2
3
4
5
6
|
>>> test
=
"Hi, nice to meet you where are you from?"
>>> re.split(r
"\s+"
,test)
[
'Hi,'
,
'nice'
,
'to'
,
'meet'
,
'you'
,
'where'
,
'are'
,
'you'
,
'from?'
]
>>> re.split(r
"\s+"
,test,
3
)
#分割前三个
[
'Hi,'
,
'nice'
,
'to'
,
'meet you where are you from?'
]
>>>
|
1
2
|
>>> help(re.compile)
compile(pattern, flags=
0
)
|
1
2
3
4
5
6
7
8
9
|
>>> test
=
"Hi, nice to meet you where are you from?"
>>> k
=
re.
compile
(r
'\w*o\w*'
)
#匹配带o的字符串
>>>
dir
(k)
[
'__copy__'
,
'__deepcopy__'
,
'findall'
,
'finditer'
,
'match'
,
'scanner'
,
'search'
,
'split'
,
'sub'
,
'subn'
]
>>>
print
k.findall(test)
#显示所有包涵o的字符串
[
'to'
,
'you'
,
'you'
,
'from'
]
>>>
print
k.sub(
lambda
m:
'['
+
m.group(
0
)
+
']'
,test)
# 将字符串中含有o的单词用[]括起来
Hi, nice [to] meet [you] where are [you] [
from
]?
>>>
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
|
#!/usr/bin/env python
import
urllib2
import
re
import
os
URL
=
'http://image.baidu.com/channel/wallpaper'
read
=
urllib2.urlopen(URL).read()
pat
=
re.
compile
(r
'src=\'#\'" //.+?.js">'
)
urls
=
re.findall(pat,read)
for
i
in
urls:
url
=
i.replace(
'src=\'#\'" /code>
|