openoffice命令行方式将word、excel、ppt转成pdf

时间:2021-01-18 06:42:42

上一篇记录了安装libreoffice的方式将office文档转换成pdf,接下来将使用openoffice实现同样的功能。

首先到openoffice官网下载最新的rpm压缩包:

https://www.openoffice.org/download/index.html

目前我下载最的的是:Apache_OpenOffice_4.1.3_Linux_x86-64_install-rpm_zh-CN.tar.gz

解压并进入到相关目录安装rpm包:

[root@instance-32spzihn src]# tar -zxvf Apache_OpenOffice_4.1.3_Linux_x86-64_install-rpm_zh-CN.tar.gz
[root@instance-32spzihn src]# cd zh-CN/RPMS
[root@instance-32spzihn RPMS]# yum install *.rpm
[root@instance-32spzihn RPMS]# cd /opt/openoffice4/program

安装完成并进入到对应的program目录之后,启动openoffice:

[root@instance-32spzihn program]# ./soffice -headless -accept="socket,host=127.0.0.1,port=8100;urp;" -nofirststartwizard &

发现报错:

[root@instance-32spzihn program]# javaldx: Could not find a Java Runtime Environment!

不过openoffice服务已经启动,由于只需要用到转换接口命令,不影响功能的情况下,我就不深究了。


接下来就需要一份python的脚本程序,因为转换需要通过py程序去调用api处理,将下面程序保存到topdf.py文件:

#
# PyODConverter (Python OpenDocument Converter) v1.1 - 2009-11-14
#
# This script converts a document from one office format to another by
# connecting to an OpenOffice.org instance via Python-UNO bridge.
#
# Copyright (C) 2008-2009 Mirko Nasato <mirko@artofsolving.com>
# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl-2.1.html
# - or any later version.
#
DEFAULT_OPENOFFICE_PORT = 8100

import uno
from os.path import abspath, isfile, splitext
from com.sun.star.beans import PropertyValue
from com.sun.star.task import ErrorCodeIOException
from com.sun.star.connection import NoConnectException

FAMILY_TEXT = "Text"
FAMILY_WEB = "Web"
FAMILY_SPREADSHEET = "Spreadsheet"
FAMILY_PRESENTATION = "Presentation"
FAMILY_DRAWING = "Drawing"

#---------------------#
# Configuration Start #
#---------------------#

# see http://wiki.services.openoffice.org/wiki/Framework/Article/Filter

# most formats are auto-detected; only those requiring options are defined here
IMPORT_FILTER_MAP = {
"txt": {
"FilterName": "Text (encoded)",
"FilterOptions": "utf8"
},
"csv": {
"FilterName": "Text - txt - csv (StarCalc)",
"FilterOptions": "44,34,0"
}
}

EXPORT_FILTER_MAP = {
"pdf": {
FAMILY_TEXT: { "FilterName": "writer_pdf_Export" },
FAMILY_WEB: { "FilterName": "writer_web_pdf_Export" },
FAMILY_SPREADSHEET: { "FilterName": "calc_pdf_Export" },
FAMILY_PRESENTATION: { "FilterName": "impress_pdf_Export" },
FAMILY_DRAWING: { "FilterName": "draw_pdf_Export" }
},
"html": {
FAMILY_TEXT: { "FilterName": "HTML (StarWriter)" },
FAMILY_SPREADSHEET: { "FilterName": "HTML (StarCalc)" },
FAMILY_PRESENTATION: { "FilterName": "impress_html_Export" }
},
"odt": {
FAMILY_TEXT: { "FilterName": "writer8" },
FAMILY_WEB: { "FilterName": "writerweb8_writer" }
},
"doc": {
FAMILY_TEXT: { "FilterName": "MS Word 97" }
},
"rtf": {
FAMILY_TEXT: { "FilterName": "Rich Text Format" }
},
"txt": {
FAMILY_TEXT: {
"FilterName": "Text",
"FilterOptions": "utf8"
}
},
"ods": {
FAMILY_SPREADSHEET: { "FilterName": "calc8" }
},
"xls": {
FAMILY_SPREADSHEET: { "FilterName": "MS Excel 97" }
},
"csv": {
FAMILY_SPREADSHEET: {
"FilterName": "Text - txt - csv (StarCalc)",
"FilterOptions": "44,34,0"
}
},
"odp": {
FAMILY_PRESENTATION: { "FilterName": "impress8" }
},
"ppt": {
FAMILY_PRESENTATION: { "FilterName": "MS PowerPoint 97" }
},
"swf": {
FAMILY_DRAWING: { "FilterName": "draw_flash_Export" },
FAMILY_PRESENTATION: { "FilterName": "impress_flash_Export" }
}
}

PAGE_STYLE_OVERRIDE_PROPERTIES = {
FAMILY_SPREADSHEET: {
#--- Scale options: uncomment 1 of the 3 ---
# a) 'Reduce / enlarge printout': 'Scaling factor'
"PageScale": 100,
# b) 'Fit print range(s) to width / height': 'Width in pages' and 'Height in pages'
#"ScaleToPagesX": 1, "ScaleToPagesY": 1000,
# c) 'Fit print range(s) on number of pages': 'Fit print range(s) on number of pages'
#"ScaleToPages": 1,
"PrintGrid": False
}
}

#-------------------#
# Configuration End #
#-------------------#

class DocumentConversionException(Exception):

def __init__(self, message):
self.message = message

def __str__(self):
return self.message


class DocumentConverter:

def __init__(self, port=DEFAULT_OPENOFFICE_PORT):
localContext = uno.getComponentContext()
resolver = localContext.ServiceManager.createInstanceWithContext("com.sun.star.bridge.UnoUrlResolver", localContext)
try:
context = resolver.resolve("uno:socket,host=localhost,port=%s;urp;StarOffice.ComponentContext" % port)
except NoConnectException:
raise DocumentConversionException, "failed to connect to OpenOffice.org on port %s" % port
self.desktop = context.ServiceManager.createInstanceWithContext("com.sun.star.frame.Desktop", context)

def convert(self, inputFile, outputFile):

inputUrl = self._toFileUrl(inputFile)
outputUrl = self._toFileUrl(outputFile)

loadProperties = { "Hidden": True }
inputExt = self._getFileExt(inputFile)
if IMPORT_FILTER_MAP.has_key(inputExt):
loadProperties.update(IMPORT_FILTER_MAP[inputExt])

document = self.desktop.loadComponentFromURL(inputUrl, "_blank", 0, self._toProperties(loadProperties))
try:
document.refresh()
except AttributeError:
pass

family = self._detectFamily(document)
self._overridePageStyleProperties(document, family)

outputExt = self._getFileExt(outputFile)
storeProperties = self._getStoreProperties(document, outputExt)

try:
document.storeToURL(outputUrl, self._toProperties(storeProperties))
finally:
document.close(True)

def _overridePageStyleProperties(self, document, family):
if PAGE_STYLE_OVERRIDE_PROPERTIES.has_key(family):
properties = PAGE_STYLE_OVERRIDE_PROPERTIES[family]
pageStyles = document.getStyleFamilies().getByName('PageStyles')
for styleName in pageStyles.getElementNames():
pageStyle = pageStyles.getByName(styleName)
for name, value in properties.items():
pageStyle.setPropertyValue(name, value)

def _getStoreProperties(self, document, outputExt):
family = self._detectFamily(document)
try:
propertiesByFamily = EXPORT_FILTER_MAP[outputExt]
except KeyError:
raise DocumentConversionException, "unknown output format: '%s'" % outputExt
try:
return propertiesByFamily[family]
except KeyError:
raise DocumentConversionException, "unsupported conversion: from '%s' to '%s'" % (family, outputExt)

def _detectFamily(self, document):
if document.supportsService("com.sun.star.text.WebDocument"):
return FAMILY_WEB
if document.supportsService("com.sun.star.text.GenericTextDocument"):
# must be TextDocument or GlobalDocument
return FAMILY_TEXT
if document.supportsService("com.sun.star.sheet.SpreadsheetDocument"):
return FAMILY_SPREADSHEET
if document.supportsService("com.sun.star.presentation.PresentationDocument"):
return FAMILY_PRESENTATION
if document.supportsService("com.sun.star.drawing.DrawingDocument"):
return FAMILY_DRAWING
raise DocumentConversionException, "unknown document family: %s" % document

def _getFileExt(self, path):
ext = splitext(path)[1]
if ext is not None:
return ext[1:].lower()

def _toFileUrl(self, path):
return uno.systemPathToFileUrl(abspath(path))

def _toProperties(self, dict):
props = []
for key in dict:
prop = PropertyValue()
prop.Name = key
prop.Value = dict[key]
props.append(prop)
return tuple(props)


if __name__ == "__main__":
from sys import argv, exit

if len(argv) < 3:
print "USAGE: python %s <input-file> <output-file>" % argv[0]
exit(255)
if not isfile(argv[1]):
print "no such input file: %s" % argv[1]
exit(1)

try:
converter = DocumentConverter()
converter.convert(argv[1], argv[2])
except DocumentConversionException, exception:
print "ERROR! " + str(exception)
exit(1)
except ErrorCodeIOException, exception:
print "ERROR! ErrorCodeIOException %d" % exception.ErrCode
exit(1)

将文件保存到program目录下面,然后就可以执行转换命令了:

[root@instance-32spzihn program]# ./python topdf.py zzz.docx zzz_1.pdf

同样的命令也可以转换其它格式的文件,如:

[root@instance-32spzihn program]# ./python topdf.py zzz.docx zzz_1.html

相对来说,libreoffice实现起来要比openoffice要简单点,转换效果差不多,若转换中文文档乱码,请参数libreoffice的字体部分。