import os
import PyPDF2
import sys
import comtypes.client
SUPPORTED_FORMATS = ['pdf', 'doc', 'docx']
WORD_FORMATS = ['doc', 'docx']
f_list = []
for dirpath, subdirs, files in os.walk("."):
for f in files:
if f.split(".")[1] in WORD_FORMATS and f[0] != "~":
f_list.append(os.path.join(dirpath, f))
"""
The following code converts "doc" and "docx" files to "pdf". But once it opens the files, due to some issue in our code
(usually an unattended Word prompt) it does not closes the files properly and the Operating System file lock
remains open on the file. So the code runs for one time but not the second time unless we end the "Word" program
instances from the Task Manager.
"""
os.system('taskkill /IM "WINWORD.exe" /F')
wdFormatPDF = 17
for f in f_list:
in_file = os.path.abspath(f)
out_file = in_file.split(".")[0] + ".pdf".strip()
word = comtypes.client.CreateObject('Word.Application')
word.Visible = True
doc = word.Documents.Open(in_file)
doc.SaveAs(out_file, FileFormat=wdFormatPDF)
#doc.Close()
#word.Quit()
os.system('taskkill /IM "WINWORD.exe" /F')
Other Notes
import docx
# pip install python-docx
# Does not support .pdf and .doc
Pages
▼
No comments:
Post a Comment