之前使用的ole的方法,doctotxt,在通过另存为的方式处理word的时候,保存的格式是ANSI的,就会导致韩文等字符变成?,丢失数据。
上网查询(用的百度),绞尽脑汁,黔驴技穷,山穷水尽,一筹莫展,崩溃了
上午的时候,实在没有办法了,考虑用c#去实现,然后在c里调用c#的程序(DLL)
今天下午继续看文档 http://msdn.microsoft.com/en-us/library/Aa155776#offaut_creautclicplus
有点灵感,对msword.cpp进行了修改
VARIANT Selection::GetText(char* result) { //CString result; // LPSTR // wchar_t tmp[10240] = {0}; // LPWSTR result1 = tmp; VARIANT tmp; InvokeHelper(0x0, DISPATCH_PROPERTYGET, VT_VARIANT, (void*)&tmp, NULL); return tmp; }
调试了下,发现可以显示韩文了,终于有了突破,这种方法可以解决问题了,这时是3,4点钟
继续看文档,搜wdFormatUnicodeText
接着,想到了用Google去搜,这个时候,有了重大突破,
还是Google好啊,一招制敌,泪流满面,相见恨晚。
唉,竟然忘记了:搜技术问题一定要用Google。教训啊,走了这么多的弯路。
Google上面有老外的提问和解答,找到了点线索
http://*.com/questions/11736327/save-every-page-of-a-word-document-into-a-txt-file-utf-8-using-vba
nDoc.SaveAs Encoding:=msoEncodingUTF8, FileFormat:=wdFormatUnicodeText,nDoc.SaveAs Encoding:=msoEncodingUTF8, FileFormat:=wdFormatUnicodeText,
知道了要用Encoding这个参数
看到了saveas的参数
public virtual void SaveAs ([OptionalAttribute] ref Object FileName,[OptionalAttribute] ref Object FileFormat,[OptionalAttribute] ref Object LockComments,[OptionalAttribute] ref Object Password,[OptionalAttribute] ref Object AddToRecentFiles,[OptionalAttribute] ref Object WritePassword,[OptionalAttribute] ref Object ReadOnlyRecommended,[OptionalAttribute] ref Object EmbedTrueTypeFonts,[OptionalAttribute] ref Object SaveNativePictureFormat,[OptionalAttribute] ref Object SaveFormsData,[OptionalAttribute] ref Object SaveAsAOCELetter,[OptionalAttribute] ref Object Encoding,[OptionalAttribute] ref Object InsertLineBreaks,[OptionalAttribute] ref Object AllowSubstitutions,[OptionalAttribute] ref Object LineEnding,[OptionalAttribute] ref Object AddBiDiMarks)
知道了再程序中需要再添加一个参数,看文档的时候知道了那边的参数数值要按反过来的顺序写,
再搜Const msoEncodingUTF8 = 65001 ,知道了这个值是65001
然后修改程序
//modified by Sunday 2013-4-14 VARIANT vOpt; vOpt.vt = VT_ERROR; vOpt.scode = DISP_E_PARAMNOTFOUND; //Prepare arguments for save as .txt VARIANT vArgsSaveAs[12]; DISPPARAMS dpSaveAs; dpSaveAs.cArgs = 12; dpSaveAs.cNamedArgs = 0; dpSaveAs.rgvarg = vArgsSaveAs; vArgsSaveAs[11].vt = VT_BSTR; vArgsSaveAs[11].bstrVal = bstrSaveFile; //Filename vArgsSaveAs[10].vt = VT_I4;//VT_I4; vArgsSaveAs[10].lVal = 7;//wdFormatUnicodeText;// 7;//2; //FileFormat(wdFormatText = 2) vArgsSaveAs[9] = vFalse; //LockComments vArgsSaveAs[8].vt = VT_BSTR; vArgsSaveAs[8].bstrVal = m_bstrEmptyString; //Password vArgsSaveAs[7].vt = VT_BOOL; vArgsSaveAs[7].boolVal = TRUE; //AddToRecentFiles vArgsSaveAs[6].vt = VT_BSTR; vArgsSaveAs[6].bstrVal = m_bstrEmptyString; //WritePassword vArgsSaveAs[5] = vFalse; //ReadOnlyRecommended vArgsSaveAs[4] = vFalse; //EmbedTrueTypeFonts vArgsSaveAs[3] = vFalse; //SaveNativePictureFormat vArgsSaveAs[2] = vFalse; //SaveFormsData vArgsSaveAs[1] = vFalse; //SaveAsOCELetter vArgsSaveAs[0].vt = VT_I4; vArgsSaveAs[0].lVal = 65001;//UTF8
调试,激动人心的时刻
打开记事本,内容为
Today is 2012-11-12 왜 한국 사람测1试 A B C ffffffffffffffff顶顶顶顶顶顶顶顶kkkkkkkkkkk ffffffffffffffff顶顶顶顶顶顶顶顶kkkkkkkkkkk ffffff顶顶顶顶顶顶顶顶顶顶顶顶顶顶fffffffffffffffff gggggggggg4-13
终于解决了,
这是人类的一小步,是我的一大步
这时是5点多,打开窗户,伸出头去,吹吹风,春风,看看外面的世界,
这2天都没有怎么出门,极度烦闷,
Ag 5.18 Au 295 ,事已至此,不能惊慌
完整代码
void DocToTxt(BSTR bstrOpenFile, BSTR bstrSaveFile) { // ******************* Declare Some Variables ******************** // Variables that will be used and re-used in our calls DISPPARAMS m_dpNoArgs = {NULL, NULL, 0, 0}; VARIANT m_vResult; OLECHAR FAR* m_szFunction; // IDispatch pointers for Word's objects IDispatch* m_pDispDocs; //Documents collection IDispatch* m_pDispActiveDoc; //ActiveDocument object // DISPID's DISPID m_dispid_Docs; //Documents property of Application object DISPID m_dispid_ActiveDoc; //ActiveDocument property of Application DISPID m_dispid_SaveAs; //SaveAs method of the Document object DISPID m_dispid_Quit; //Quit method of the Application object DISPID m_dispid_Open; //Open method of the Application object BSTR m_bstrEmptyString ; // ******************** Start Automation *********************** //Initialize the COM libraries ::CoInitialize(NULL); // Create an instance of the Word application and obtain the pointer // to the application's IDispatch interface. CLSID m_clsid; CLSIDFromProgID(L"Word.Application.12", &m_clsid); IUnknown* m_pUnk; HRESULT m_hr = ::CoCreateInstance( m_clsid, NULL, CLSCTX_SERVER, IID_IUnknown, (void**) &m_pUnk); IDispatch* m_pDispApp; m_hr = m_pUnk->QueryInterface( IID_IDispatch, (void**)&m_pDispApp); // Get IDispatch* for the Documents collection object m_szFunction = OLESTR("Documents"); m_hr = m_pDispApp->GetIDsOfNames (IID_NULL, &m_szFunction, 1, LOCALE_USER_DEFAULT, &m_dispid_Docs); m_hr = m_pDispApp->Invoke (m_dispid_Docs, IID_NULL, LOCALE_USER_DEFAULT, DISPATCH_PROPERTYGET, &m_dpNoArgs, &m_vResult, NULL, NULL); m_pDispDocs = m_vResult.pdispVal; // Query id of method open m_szFunction = OLESTR("Open"); m_hr = m_pDispDocs->GetIDsOfNames(IID_NULL, &m_szFunction,1, LOCALE_USER_DEFAULT, &m_dispid_Open); // Prepare parameters for open method //modified by Sunday 2013-4-14 VARIANT vArgsOpen[6]; DISPPARAMS dpOpen; dpOpen.cArgs = 6; dpOpen.cNamedArgs = 0; dpOpen.rgvarg = vArgsOpen; VARIANT vFalse, vTrue; vFalse.vt = VT_BOOL; vFalse.boolVal = FALSE; vTrue.vt = VT_BOOL; vTrue.boolVal = TRUE; m_bstrEmptyString = ::SysAllocString(OLESTR("")) ; vArgsOpen[5].vt = VT_BSTR; vArgsOpen[5].bstrVal = bstrOpenFile; vArgsOpen[4] = vFalse; vArgsOpen[3] = vTrue; vArgsOpen[2] = vFalse; vArgsOpen[1].vt = VT_BSTR; vArgsOpen[1].bstrVal = m_bstrEmptyString; vArgsOpen[0].vt = VT_BSTR; vArgsOpen[0].bstrVal = m_bstrEmptyString; //Execute open method m_hr=m_pDispDocs->Invoke(m_dispid_Open,IID_NULL,LOCALE_USER_DEFAULT, DISPATCH_METHOD,&dpOpen,NULL,NULL,NULL); //Query activedocument interface m_szFunction = OLESTR("ActiveDocument"); m_hr = m_pDispApp->GetIDsOfNames(IID_NULL, &m_szFunction, 1, LOCALE_USER_DEFAULT,&m_dispid_ActiveDoc); m_hr = m_pDispApp->Invoke(m_dispid_ActiveDoc,IID_NULL, LOCALE_USER_DEFAULT, DISPATCH_PROPERTYGET, &m_dpNoArgs,&m_vResult,NULL,NULL); m_pDispActiveDoc = m_vResult.pdispVal; //4-14 //modified by Sunday 2013-4-14 VARIANT vOpt; vOpt.vt = VT_ERROR; vOpt.scode = DISP_E_PARAMNOTFOUND; //Prepare arguments for save as .txt VARIANT vArgsSaveAs[12]; DISPPARAMS dpSaveAs; dpSaveAs.cArgs = 12; dpSaveAs.cNamedArgs = 0; dpSaveAs.rgvarg = vArgsSaveAs; vArgsSaveAs[11].vt = VT_BSTR; vArgsSaveAs[11].bstrVal = bstrSaveFile; //Filename vArgsSaveAs[10].vt = VT_I4;//VT_I4; vArgsSaveAs[10].lVal = 7;//wdFormatUnicodeText;// 7;//2; //FileFormat(wdFormatText = 2) vArgsSaveAs[9] = vFalse; //LockComments vArgsSaveAs[8].vt = VT_BSTR; vArgsSaveAs[8].bstrVal = m_bstrEmptyString; //Password vArgsSaveAs[7].vt = VT_BOOL; vArgsSaveAs[7].boolVal = TRUE; //AddToRecentFiles vArgsSaveAs[6].vt = VT_BSTR; vArgsSaveAs[6].bstrVal = m_bstrEmptyString; //WritePassword vArgsSaveAs[5] = vFalse; //ReadOnlyRecommended vArgsSaveAs[4] = vFalse; //EmbedTrueTypeFonts vArgsSaveAs[3] = vFalse; //SaveNativePictureFormat vArgsSaveAs[2] = vFalse; //SaveFormsData vArgsSaveAs[1] = vFalse; //SaveAsOCELetter vArgsSaveAs[0].vt = VT_I4; vArgsSaveAs[0].lVal = 65001;//UTF8 /* //Prepare arguments for save as .txt VARIANT vArgsSaveAs[11]; DISPPARAMS dpSaveAs; dpSaveAs.cArgs = 11; dpSaveAs.cNamedArgs = 0; dpSaveAs.rgvarg = vArgsSaveAs; vArgsSaveAs[10].vt = VT_BSTR; vArgsSaveAs[10].bstrVal = bstrSaveFile; //Filename vArgsSaveAs[9].vt = VT_I4;//VT_I4; vArgsSaveAs[9].lVal = 7;//wdFormatUnicodeText;// 7;//2; //FileFormat(wdFormatText = 2) vArgsSaveAs[8] = vFalse; //LockComments vArgsSaveAs[7].vt = VT_BSTR; vArgsSaveAs[7].bstrVal = m_bstrEmptyString; //Password vArgsSaveAs[6].vt = VT_BOOL; vArgsSaveAs[6].boolVal = TRUE; //AddToRecentFiles vArgsSaveAs[5].vt = VT_BSTR; vArgsSaveAs[5].bstrVal = m_bstrEmptyString; //WritePassword vArgsSaveAs[4] = vFalse; //ReadOnlyRecommended vArgsSaveAs[3] = vFalse; //EmbedTrueTypeFonts vArgsSaveAs[2] = vFalse; //SaveNativePictureFormat vArgsSaveAs[1] = vFalse; //SaveFormsData vArgsSaveAs[0] = vFalse; //SaveAsOCELetter */ // Query and execute SaveAs method m_szFunction = OLESTR("SaveAs"); m_hr = m_pDispActiveDoc->GetIDsOfNames(IID_NULL,&m_szFunction,1, LOCALE_USER_DEFAULT,&m_dispid_SaveAs); m_hr = m_pDispActiveDoc->Invoke(m_dispid_SaveAs, IID_NULL, LOCALE_SYSTEM_DEFAULT,DISPATCH_METHOD,//LOCALE_USER_DEFAULT &dpSaveAs,NULL,NULL,NULL); // Invoke the Quit method m_szFunction = OLESTR("Quit"); m_hr = m_pDispApp->GetIDsOfNames(IID_NULL, &m_szFunction, 1, LOCALE_USER_DEFAULT, &m_dispid_Quit); m_hr = m_pDispApp->Invoke(m_dispid_Quit, IID_NULL, LOCALE_USER_DEFAULT, DISPATCH_METHOD, &m_dpNoArgs, NULL, NULL, NULL); // cout << m_bstrEmptyString << endl; cout << (char*)m_bstrEmptyString << endl; //Clean-up ::SysFreeString(bstrOpenFile) ; ::SysFreeString(bstrSaveFile) ; ::SysFreeString(m_bstrEmptyString) ; m_pDispActiveDoc->Release(); m_pDispDocs->Release(); m_pDispApp->Release(); m_pUnk->Release(); ::CoUninitialize(); } int main(int argc, char* argv[]) { DocToTxt(::SysAllocString(OLESTR("D:\\code\\data\\c2.docx")), ::SysAllocString(OLESTR("D:\\to.txt"))); }