获取web.py上面的示例code

import requests

import re

import os.path

#取得文件名和内容对应字典

def getCode(url):

    pattern=re.compile(r'<h\d>([^<]+)?</h\d>\n*<pre><code>[^<]*</code>{1}?',re.S)

    dic={}

    r=requests.get(url)

    if r.status_code==200:

        for g in re.finditer(pattern,r.text):

            dic[g.group(1)]=g.group(0)

    return dic

#输出到文件中

def saveFile(saveDir,dic):

    if not os.path.isdir(saveDir):

        os.makedirs(saveDir)

    for key in dic:

        print key,dic[key]

        #

        outPath=saveDir

        fileName=key

        rIndex=key.rfind("/")

        if rIndex!=-1:

            outPath=outPath+"\\"+key[:rIndex]

            fileName=key[rIndex+1:]

            if not os.path.isdir(outPath):

                os.makedirs(outPath)

        outFile=open(outPath+"\\"+fileName,'w')

        beginIndex=dic[key].find("<code>")

        endIndex=dic[key].find("</code>")

        if beginIndex==-1 or endIndex==-1:

            print "<code> have not"

            return

        subs=dic[key][beginIndex+6:endIndex]

        outFile.write(subs)

        outFile.close()

url1='http://webpy.org/skeleton/0.3'

url2='http://webpy.org/src/blog/0.3'

url3='http://webpy.org/src/todo-list/0.3'

url4='http://webpy.org/src/wiki/0.3'

urls=(url1,url2,url3,url4)

for url in urls:

    #取得文件名和内容对应字典

    dic=getCode(url)

    #相对目录不变

    proPath=url.replace("http://","\\").replace("/","\\")

    #保存根目录设定在【f:\pyworkspace】下

    saveFile(r'f:\pyworkspace'+proPath,dic)
秒客网

获取web.py上面的示例code

相关文章