import os
from zipfile import ZipFile
from pptx import Presentation
from docx import Document
class PPT:
def PPT_get_pictrue(self,infile):
in_File = infile.split('/')[2][:-5]
new_filepath = os.path.join('%s/%s') % ('./resluts', in_File)
if not os.path.exists(new_filepath):
os.makedirs(new_filepath)
with ZipFile(infile) as f:
for file in f.namelist():
if file.startswith("ppt/media/"):
f.extract(file, path=new_filepath)
return new_filepath
def PPT_get_words_to_txt(self,inpath, outpath):
m_ppt = Presentation(inpath)
# print(len(m_ppt.slides))
with open(os.path.join('%s/%s.txt') % (outpath, 'resluts'), 'w', encoding='utf-8') as f:
for slide in m_ppt.slides:
for shape in slide.shapes:
if not shape.has_text_frame:
continue
for paragraph in shape.text_frame.paragraphs:
for content in paragraph.runs:
f.write(content.text + '\n')
def PPT_get_words_to_docx(self,filepath,save_path):
wordfile = Document()
pptx = Presentation(filepath)
for slide in pptx.slides:
for shape in slide.shapes:
if shape.has_text_frame:
text_frame = shape.text_frame
for paragraph in text_frame.paragraphs:
wordfile.add_paragraph(paragraph.text)
wordfile.save(save_path)
if __name__ == "__main__":
infile = "./test_data/OpenCV算法解析.pptx"
new_infile=PPT().PPT_get_pictrue(infile)
PPT().PPT_get_words_to_txt(infile,new_infile)
结果如下: