import os from win32com import client import pdfkit
# 转换doc为pdf defdoc2pdf(fn): word = client.Dispatch("Word.Application") # 打开word应用程序 # for file in files: doc = word.Documents.Open(fn) # 打开word文件 doc.SaveAs("{}.pdf".format(fn[:-4]), 17) # 另存为后缀为".pdf"的文件,其中参数17表示为pdf doc.Close() # 关闭原来word文件 word.Quit()
# 转换docx为pdf defdocx2pdf(fn): word = client.Dispatch("Word.Application") # 打开word应用程序 # for file in files: doc = word.Documents.Open(fn) # 打开word文件 doc.SaveAs("{}.pdf".format(fn[:-5]), 17) # 另存为后缀为".pdf"的文件,其中参数17表示为pdf doc.Close() # 关闭原来word文件 word.Quit()
defconvert_doc_to_pdf(path): if os.path.isdir(path): for root, dirs, files in os.walk(path): # print(files) for file in files: if file.endswith('.DOC') or file.endswith('.DOCX'): doc_path = os.path.join(root, file) print(doc_path) if file.endswith('.DOC'): doc2pdf(doc_path) os.remove(doc_path) print('{}_已完成转换'.format(file)) elif file.endswith('.DOCX'): docx2pdf(doc_path) os.remove(doc_path) print('{}_已完成转换'.format(file)) else: print(f"Path provided is not a directory: {path}")
defhtml_to_pdf(path): if os.path.isdir(path): for root, dirs, files in os.walk(path): for file in files: if file.endswith('.HTML') or file.endswith('.html'): html_path = os.path.join(root, file) pdf_path = '{}.pdf'.format(html_path[:-5],17) try: pdfkit.from_file(html_path,pdf_path) except OSError as e: if'ProtocolUnknownError'instr(e): pass else: raise e os.remove(html_path) print('{}_已完成转换'.format(html_path)) else: print(f"Path provided is not a directory: {path}")
if __name__ == '__main__': path = r'D:\workspace\旧公文系统' convert_doc_to_pdf(path) html_to_pdf(path)