import os import PyPDF2 import sys import comtypes.client SUPPORTED_FORMATS = ['pdf', 'doc', 'docx'] WORD_FORMATS = ['doc', 'docx'] f_list = [] for dirpath, subdirs, files in os.walk("."): for f in files: if f.split(".")[1] in WORD_FORMATS and f[0] != "~": f_list.append(os.path.join(dirpath, f)) """ The following code converts "doc" and "docx" files to "pdf". But once it opens the files, due to some issue in our code (usually an unattended Word prompt) it does not closes the files properly and the Operating System file lock remains open on the file. So the code runs for one time but not the second time unless we end the "Word" program instances from the Task Manager. """ os.system('taskkill /IM "WINWORD.exe" /F') wdFormatPDF = 17 for f in f_list: in_file = os.path.abspath(f) out_file = in_file.split(".")[0] + ".pdf".strip() word = comtypes.client.CreateObject('Word.Application') word.Visible = True doc = word.Documents.Open(in_file) doc.SaveAs(out_file, FileFormat=wdFormatPDF) #doc.Close() #word.Quit() os.system('taskkill /IM "WINWORD.exe" /F')Other Notes
import docx # pip install python-docx # Does not support .pdf and .doc
Wednesday, September 1, 2021
Convert MS Word files into PDF format using Python on Windows
Subscribe to:
Post Comments (Atom)
No comments:
Post a Comment