首页 > 代码库 > python 将word另存为txt

python 将word另存为txt

 
import os  import os.pathfrom win32com import client as wcc=[]   rootdir=["d:/77"]                #以该路径为实验def txt(j,c):                 word = wc.Dispatch(Word.Application)                                 doc = word.Documents.Open(c[j])         newname=c[j][:-5]+"(translate txt)"         doc.SaveAs(newname,4)         doc.Close()         word.Quit()         os.remove(c[j])                  print("完成")                                 def wordt(c):                    #定义函数,进行筛选                for j in range(0,len(c)):                                if c[j][-5:] == ".docx":  #寻找docx文件                        txt(j,c) #                else:                        pass                                        for i in rootdir:                  #定义函数,查找所有文件            for parent,dirnames,filenames in os.walk(i):                                   for filename in filenames:                                     c.append(os.path.join(parent,filename))                wordt(c)

将docx另存为txt ,并且删除源文件

涉及到office中docx文档的打与另存为命令

相关参考

from win32com import client as wcword = wc.Dispatch(‘Word.Application‘)doc = word.Documents.Open(‘c:/test‘)doc.SaveAs(‘c:/test.text‘, 2)doc.Close()word.Quit()


open(r‘c:\text‘,‘r‘)wdFormatDocument = 0wdFormatDocument97 = 0wdFormatDocumentDefault = 16wdFormatDOSText = 4wdFormatDOSTextLineBreaks = 5wdFormatEncodedText = 7wdFormatFilteredHTML = 10wdFormatFlatXML = 19wdFormatFlatXMLMacroEnabled = 20wdFormatFlatXMLTemplate = 21wdFormatFlatXMLTemplateMacroEnabled = 22wdFormatHTML = 8wdFormatPDF = 17wdFormatRTF = 6wdFormatTemplate = 1wdFormatTemplate97 = 1wdFormatText = 2wdFormatTextLineBreaks = 3wdFormatUnicodeText = 7wdFormatWebArchive = 9wdFormatXML = 11wdFormatXMLDocument = 12wdFormatXMLDocumentMacroEnabled = 13wdFormatXMLTemplate = 14wdFormatXMLTemplateMacroEnabled = 15wdFormatXPS = 18

over!

python 将word另存为txt