Merge remote-tracking branch 'origin/main' into feat-multiple-ocr-engines

This commit is contained in:
Michele Dolfi 2024-10-08 10:55:08 +02:00
commit 67746044a9
3 changed files with 12 additions and 4 deletions

View File

@ -1,3 +1,9 @@
## [v1.18.0](https://github.com/DS4SD/docling/releases/tag/v1.18.0) - 2024-10-03
### Feature
* New torch-based docling models ([#120](https://github.com/DS4SD/docling/issues/120)) ([`2422f70`](https://github.com/DS4SD/docling/commit/2422f706a1b02a679bcbaaba097fef2f69aba0f4))
## [v1.17.0](https://github.com/DS4SD/docling/releases/tag/v1.17.0) - 2024-10-03 ## [v1.17.0](https://github.com/DS4SD/docling/releases/tag/v1.17.0) - 2024-10-03
### Feature ### Feature

View File

@ -199,9 +199,6 @@ class DocumentConverter:
end_pb_time = time.time() - start_pb_time end_pb_time = time.time() - start_pb_time
_log.info(f"Finished converting page batch time={end_pb_time:.3f}") _log.info(f"Finished converting page batch time={end_pb_time:.3f}")
# Free up mem resources of PDF backend
in_doc._backend.unload()
conv_res.pages = all_assembled_pages conv_res.pages = all_assembled_pages
self._assemble_doc(conv_res) self._assemble_doc(conv_res)
@ -227,6 +224,11 @@ class DocumentConverter:
f"{trace}" f"{trace}"
) )
finally:
# Always unload the PDF backend, even in case of failure
if in_doc._backend:
in_doc._backend.unload()
end_doc_time = time.time() - start_doc_time end_doc_time = time.time() - start_doc_time
_log.info( _log.info(
f"Finished converting document time-pages={end_doc_time:.2f}/{in_doc.page_count}" f"Finished converting document time-pages={end_doc_time:.2f}/{in_doc.page_count}"

View File

@ -1,6 +1,6 @@
[tool.poetry] [tool.poetry]
name = "docling" name = "docling"
version = "1.17.0" # DO NOT EDIT, updated automatically version = "1.18.0" # DO NOT EDIT, updated automatically
description = "Docling PDF conversion package" description = "Docling PDF conversion package"
authors = ["Christoph Auer <cau@zurich.ibm.com>", "Michele Dolfi <dol@zurich.ibm.com>", "Maxim Lysak <mly@zurich.ibm.com>", "Nikos Livathinos <nli@zurich.ibm.com>", "Ahmed Nassar <ahn@zurich.ibm.com>", "Peter Staar <taa@zurich.ibm.com>"] authors = ["Christoph Auer <cau@zurich.ibm.com>", "Michele Dolfi <dol@zurich.ibm.com>", "Maxim Lysak <mly@zurich.ibm.com>", "Nikos Livathinos <nli@zurich.ibm.com>", "Ahmed Nassar <ahn@zurich.ibm.com>", "Peter Staar <taa@zurich.ibm.com>"]
license = "MIT" license = "MIT"