要求:检查指定目录里的html文件的标题和文件名是否对应(非相同)
1
import
os,sys,re,string
2
3 def getHtmlContent(filename):
4 try :
5 fp = open(filename)
6 content = fp.read()
7 finally :
8 fp.close()
9
10 return content
11
12 def getHtmlTitle(filename) :
13 content = string.lower(getHtmlContent(filename))
14 m = re.search( ' <title>(.*)</title> ' , content)
15 return m.group( 1 )
16
17 def main():
18 print " filename , title "
19 path = " d:\\ "
20 for filename in os.listdir(path) :
21 if os.path.isfile(filename) and os.path.splitext(filename)[ 1 ] == " .htm " or
22
23 os.path.splitext(filename)[ 1 ] == " .html " :
24 print filename ,getHtmlTitle(path + filename)
25
26 if __name__ == " __main__ " :
27 main()
2
3 def getHtmlContent(filename):
4 try :
5 fp = open(filename)
6 content = fp.read()
7 finally :
8 fp.close()
9
10 return content
11
12 def getHtmlTitle(filename) :
13 content = string.lower(getHtmlContent(filename))
14 m = re.search( ' <title>(.*)</title> ' , content)
15 return m.group( 1 )
16
17 def main():
18 print " filename , title "
19 path = " d:\\ "
20 for filename in os.listdir(path) :
21 if os.path.isfile(filename) and os.path.splitext(filename)[ 1 ] == " .htm " or
22
23 os.path.splitext(filename)[ 1 ] == " .html " :
24 print filename ,getHtmlTitle(path + filename)
25
26 if __name__ == " __main__ " :
27 main()
运行环境:Python2.2