本文实例讲述了html">Python实现批量将word转html并将html内容发布至网站的方法。分享给大家供大家参考。具体实现方法如下:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
|
#coding=utf-8
__author__ = 'zhm'
from win32com import client as wc
import os
import time
import random
import MySQLdb
import re
def wordsToHtml( dir ):
#批量把文件夹的word文档转换成html文件
#金山WPS调用,抢先版的用KWPS,正式版WPS
word = wc.Dispatch( 'KWPS.Application' )
for path, subdirs, files in os.walk( dir ):
for wordFile in files:
wordFullName = os.path.join(path, wordFile)
#print "word:" + wordFullName
doc = word.Documents. Open (wordFullName)
wordFile2 = unicode (wordFile, "gbk" )
dotIndex = wordFile2.rfind( "." )
if (dotIndex = = - 1 ):
print '********************ERROR: 未取得后缀名!'
fileSuffix = wordFile2[(dotIndex + 1 ) : ]
if (fileSuffix = = "doc" or fileSuffix = = "docx" ):
fileName = wordFile2[ : dotIndex]
htmlName = fileName + ".html"
htmlFullName = os.path.join( unicode (path, "gbk" ), htmlName)
# htmlFullName = unicode(path, "gbk") + "\\" + htmlName
print u '生成了html文件:' + htmlFullName
doc.SaveAs(htmlFullName, 8 )
doc.Close()
word.Quit()
print ""
print "Finished!"
def html_add_to_db( dir ):
#将转换成功的html文件批量插入数据库中。
conn = MySQLdb.connect(
host = 'localhost' ,
port = 3306 ,
user = 'root' ,
passwd = 'root' ,
db = 'test' ,
charset = 'utf8'
)
cur = conn.cursor()
for path, subdirs, files in os.walk( dir ):
for htmlFile in files:
htmlFullName = os.path.join(path, htmlFile)
title = os.path.splitext(htmlFile)[ 0 ]
targetDir = 'D:/files/htmls/'
#D:/files为web服务器配置的静态目录
sconds = time.time()
msconds = sconds * 1000
targetFile = os.path.join(targetDir, str ( int (msconds)) + str (random.randint( 100 , 10000 )) + '.html' )
htmlFile2 = unicode (htmlFile, "gbk" )
dotIndex = htmlFile2.rfind( "." )
if (dotIndex = = - 1 ):
print '********************ERROR: 未取得后缀名!'
fileSuffix = htmlFile2[(dotIndex + 1 ) : ]
if (fileSuffix = = "htm" or fileSuffix = = "html" ):
if not os.path.exists(targetDir):
os.makedirs(targetDir)
htmlFullName = os.path.join( unicode (path, "gbk" ), htmlFullName)
htFile = open (htmlFullName, 'rb' )
#获取网页内容
htmStrCotent = htFile.read()
#找出里面的图片
img = re. compile (r """<img\s.*?\s?src\s*=\s*['|"]?([^\s'"]+).*?>""" ,re.I)
m = img.findall(htmStrCotent)
for tagContent in m:
imgSrc = unicode (tagContent, "gbk" )
imgSrcFullName = os.path.join(path, imgSrc)
#上传图片
imgTarget = 'D:/files/images/whzx/'
img_sconds = time.time()
img_msconds = sconds * 1000
targetImgFile = os.path.join(imgTarget, str ( int (img_msconds)) + str (random.randint( 100 , 10000 )) + '.png' )
if not os.path.exists(imgTarget):
os.makedirs(imgTarget)
if not os.path.exists(targetImgFile) or (os.path.exists(targetImgFile) and (os.path.getsize(targetImgFile) ! = os.path.getsize(imgSrcFullName))):
tmpImgFile = open (imgSrcFullName, 'rb' )
tmpWriteImgFile = open (targetImgFile, "wb" )
tmpWriteImgFile.write(tmpImgFile.read())
tmpImgFile.close()
tmpWriteImgFile.close()
htmStrCotent = htmStrCotent.replace(tagContent,targetImgFile.split( ":" )[ 1 ])
if not os.path.exists(targetFile) or (os.path.exists(targetFile) and (os.path.getsize(targetFile) ! = os.path.getsize(htmlFullName))):
#用iframe包装转换好的html文件。
iframeHtml = '''
<script type="text/javascript" language="javascript">
function iFrameHeight() {
var ifm= document.getElementById("iframepage");
var subWeb = document.frames ? document.frames["iframepage"].document:ifm.contentDocument;
if(ifm != null && subWeb != null) {
ifm.height = subWeb.body.scrollHeight;
}
}
</script>
<iframe src=''' + targetFile.split(': ')[1]+' ''
marginheight = "0" marginwidth = "0" frameborder = "0" scrolling = "no" width = "765" height = 100 % id = "iframepage" name = "iframepage" onLoad = "iFrameHeight()" >< / iframe>
'''
tmpTargetFile = open (targetFile, "wb" )
tmpTargetFile.write(htmStrCotent)
tmpTargetFile.close()
htFile.close()
try :
# 执行
sql = "insert into common_article(title,content) values(%s,%s)"
param = ( unicode (title, "gbk" ),iframeHtml)
cur.execute(sql,param)
except :
print "Error: unable to insert data"
cur.close()
conn.commit()
# 关闭数据库连接
conn.close()
if __name__ = = '__main__' :
wordsToHtml( 'd:/word' )
html_add_to_db( 'd:/word' )
|
希望本文所述对大家的Python程序设计有所帮助。