python合并PDF文件

时间:2022-09-10 06:37:13

今天群里有个人提了这么一个需求:不同文件夹下的同名PDF能不能合并。

这个其实很简单我写了代码实现了。就是把同名文件先收集起来,然后形式一个同名文件的列表(全路径不一样,文件名一样),然后再把这个列表中的文件合并成一个PDF文件写到一个总的目录中,好了直接上代码吧。



#coding=utf-8
import os
import os.path
from pyPdf import PdfFileReader,PdfFileWriter

class MergePDF():
def __init__(self):
self.pdf_name = ""
self.pdf_list = []

def Merge(self):
out = PdfFileWriter()

for i in range(len(self.pdf_list)):
src_pdf = self.pdf_list[i]
pdf = PdfFileReader(file(src_pdf, 'rb'))

for page in pdf.pages:
out.addPage(page)

ous = file("C:/test_merge/Merge/"+self.pdf_name,'wb')
out.write(ous)
ous.close()

class Opt():
def __init__(self):
self.srcfolderlist = []
self.dstfolder = ""

self.OptList = {}

def add_srcfolder(self,srcfolder):
self.srcfolderlist.append(srcfolder)

def add_dstfolder(self,dstfolder):
self.dstfolder = dstfolder

def deal(self):
foldernum = len(self.srcfolderlist)
for i in range(foldernum):
folder = self.srcfolderlist[i]
for parent,dirnames,filenames in os.walk(folder):
for filename in filenames:
self.createOpt(filename,parent + "/" + filename)

def createOpt(self,filename,fullname):
isexist_key = self.OptList.has_key(filename)

if isexist_key == False :
m = MergePDF()
m.pdf_name = filename
m.pdf_list.append(fullname)
self.OptList[filename] = m
else:
m = self.OptList[filename]
m.pdf_list.append(fullname)


def doMerge(self):
for key,obj in self.OptList.items():
obj.Merge()

if __name__ == "__main__":
optObj = Opt()
optObj.add_srcfolder("C:/test_merge/1")
optObj.add_srcfolder("C:/test_merge/2")
optObj.add_srcfolder("C:/test_merge/3")
optObj.add_srcfolder("C:/test_merge/4")
optObj.add_srcfolder("C:/test_merge/5")
optObj.add_srcfolder("C:/test_merge/6")

optObj.deal()
optObj.doMerge()

备注:C:\test_merge 这里里面是6个目录,每个目录下有PDF文件。


然后合并后的文件是在C:\test_merge\Merge目录中。

用到了pyPdf这个读写PDF的库。