From b8796e6705c21fd1afc88cb4d0b988654e078cf9 Mon Sep 17 00:00:00 2001 From: Peter Staar Date: Wed, 23 Oct 2024 06:20:07 +0200 Subject: [PATCH] updated the pyproject (still need to run poetry lock after docling-parse is accepted) Signed-off-by: Peter Staar --- docling/backend/docling_parse_v2_backend.py | 14 +++++++++++--- pyproject.toml | 2 +- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/docling/backend/docling_parse_v2_backend.py b/docling/backend/docling_parse_v2_backend.py index bfc68f9b..d1b7766d 100644 --- a/docling/backend/docling_parse_v2_backend.py +++ b/docling/backend/docling_parse_v2_backend.py @@ -26,9 +26,9 @@ class DoclingParseV2PageBackend(PdfPageBackend): self._ppage = page_obj parsed_page = parser.parse_pdf_from_key_on_page(document_hash, page_no) - self.valid = "pages" in parsed_page + self.valid = "pages" in parsed_page and len(parsed_page["pages"])==1 if self.valid: - self._dpage = parsed_page["pages"][page_no] + self._dpage = parsed_page["pages"][0] else: _log.info( f"An error occured when loading page {page_no} of document {document_hash}." @@ -223,8 +223,16 @@ class DoclingParseV2DocumentBackend(PdfDocumentBackend): ) def page_count(self) -> int: - return len(self._pdoc) # To be replaced with docling-parse API + #return len(self._pdoc) # To be replaced with docling-parse API + + len_1 = len(self._pdoc) + len_2 = self.parser.number_of_pages(self.document_hash) + + if len_1!=len_2: + _log.error(f"Inconsistent number of pages: {len_1}!={len_2}") + return len_2 + def load_page(self, page_no: int) -> DoclingParseV2PageBackend: return DoclingParseV2PageBackend( self.parser, self.document_hash, page_no, self._pdoc[page_no] diff --git a/pyproject.toml b/pyproject.toml index db208af9..13ac6f66 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -47,7 +47,7 @@ huggingface_hub = ">=0.23,<1" requests = "^2.32.3" easyocr = "^1.7" tesserocr = { version = "^2.7.1", optional = true } -docling-parse = "^1.6.0" +docling-parse = "^2.0.0" certifi = ">=2024.7.4" rtree = "^1.3.0" scipy = "^1.14.1"