mirror of
https://github.com/DS4SD/docling.git
synced 2025-12-08 20:58:11 +00:00
fix: skip temporary docx files (#2413)
fix: CLI detects docx temporary files and breaks Signed-off-by: Victor Moreli <victormoreli64@gmail.com>
This commit is contained in:
@@ -547,13 +547,25 @@ def convert( # noqa: C901
|
|||||||
if local_path.exists() and local_path.is_dir():
|
if local_path.exists() and local_path.is_dir():
|
||||||
for fmt in from_formats:
|
for fmt in from_formats:
|
||||||
for ext in FormatToExtensions[fmt]:
|
for ext in FormatToExtensions[fmt]:
|
||||||
input_doc_paths.extend(
|
for path in local_path.glob(f"**/*.{ext}"):
|
||||||
list(local_path.glob(f"**/*.{ext}"))
|
if path.name.startswith("~$") and ext == "docx":
|
||||||
|
_log.info(
|
||||||
|
f"Ignoring temporary Word file: {path}"
|
||||||
)
|
)
|
||||||
input_doc_paths.extend(
|
continue
|
||||||
list(local_path.glob(f"**/*.{ext.upper()}"))
|
input_doc_paths.append(path)
|
||||||
|
|
||||||
|
for path in local_path.glob(f"**/*.{ext.upper()}"):
|
||||||
|
if path.name.startswith("~$") and ext == "docx":
|
||||||
|
_log.info(
|
||||||
|
f"Ignoring temporary Word file: {path}"
|
||||||
)
|
)
|
||||||
|
continue
|
||||||
|
input_doc_paths.append(path)
|
||||||
elif local_path.exists():
|
elif local_path.exists():
|
||||||
|
if not local_path.name.startswith("~$") and ext == "docx":
|
||||||
|
_log.info(f"Ignoring temporary Word file: {path}")
|
||||||
|
continue
|
||||||
input_doc_paths.append(local_path)
|
input_doc_paths.append(local_path)
|
||||||
else:
|
else:
|
||||||
err_console.print(
|
err_console.print(
|
||||||
|
|||||||
Reference in New Issue
Block a user